1 //===- bolt/Passes/RegReAssign.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the RegReAssign class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/RegReAssign.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/BinaryFunctionCallGraph.h" 16 #include "bolt/Passes/DataflowAnalysis.h" 17 #include "bolt/Passes/DataflowInfoManager.h" 18 #include "bolt/Utils/Utils.h" 19 #include <numeric> 20 21 #define DEBUG_TYPE "regreassign" 22 23 using namespace llvm; 24 25 namespace opts { 26 extern cl::OptionCategory BoltOptCategory; 27 extern cl::opt<bool> UpdateDebugSections; 28 29 static cl::opt<bool> AggressiveReAssign( 30 "use-aggr-reg-reassign", 31 cl::desc("use register liveness analysis to try to find more opportunities " 32 "for -reg-reassign optimization"), 33 cl::cat(BoltOptCategory)); 34 } 35 36 namespace llvm { 37 namespace bolt { 38 39 void RegReAssign::swap(BinaryFunction &Function, MCPhysReg A, MCPhysReg B) { 40 BinaryContext &BC = Function.getBinaryContext(); 41 const BitVector &AliasA = BC.MIB->getAliases(A, false); 42 const BitVector &AliasB = BC.MIB->getAliases(B, false); 43 44 // Regular instructions 45 for (BinaryBasicBlock &BB : Function) { 46 for (MCInst &Inst : BB) { 47 for (MCOperand &Operand : MCPlus::primeOperands(Inst)) { 48 if (!Operand.isReg()) 49 continue; 50 51 unsigned Reg = Operand.getReg(); 52 if (AliasA.test(Reg)) { 53 Operand.setReg(BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg))); 54 --StaticBytesSaved; 55 DynBytesSaved -= BB.getKnownExecutionCount(); 56 continue; 57 } 58 if (!AliasB.test(Reg)) 59 continue; 60 Operand.setReg(BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg))); 61 ++StaticBytesSaved; 62 DynBytesSaved += BB.getKnownExecutionCount(); 63 } 64 } 65 } 66 67 // CFI 68 DenseSet<const MCCFIInstruction *> Changed; 69 for (BinaryBasicBlock &BB : Function) { 70 for (MCInst &Inst : BB) { 71 if (!BC.MIB->isCFI(Inst)) 72 continue; 73 const MCCFIInstruction *CFI = Function.getCFIFor(Inst); 74 if (Changed.count(CFI)) 75 continue; 76 Changed.insert(CFI); 77 78 switch (CFI->getOperation()) { 79 case MCCFIInstruction::OpRegister: { 80 const unsigned CFIReg2 = CFI->getRegister2(); 81 const MCPhysReg Reg2 = *BC.MRI->getLLVMRegNum(CFIReg2, /*isEH=*/false); 82 if (AliasA.test(Reg2)) { 83 Function.setCFIFor( 84 Inst, MCCFIInstruction::createRegister( 85 nullptr, CFI->getRegister(), 86 BC.MRI->getDwarfRegNum( 87 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg2)), 88 false))); 89 } else if (AliasB.test(Reg2)) { 90 Function.setCFIFor( 91 Inst, MCCFIInstruction::createRegister( 92 nullptr, CFI->getRegister(), 93 BC.MRI->getDwarfRegNum( 94 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg2)), 95 false))); 96 } 97 } 98 [[fallthrough]]; 99 case MCCFIInstruction::OpUndefined: 100 case MCCFIInstruction::OpDefCfa: 101 case MCCFIInstruction::OpOffset: 102 case MCCFIInstruction::OpRestore: 103 case MCCFIInstruction::OpSameValue: 104 case MCCFIInstruction::OpDefCfaRegister: 105 case MCCFIInstruction::OpRelOffset: 106 case MCCFIInstruction::OpEscape: { 107 unsigned CFIReg; 108 if (CFI->getOperation() != MCCFIInstruction::OpEscape) { 109 CFIReg = CFI->getRegister(); 110 } else { 111 std::optional<uint8_t> Reg = 112 readDWARFExpressionTargetReg(CFI->getValues()); 113 // Handle DW_CFA_def_cfa_expression 114 if (!Reg) 115 break; 116 CFIReg = *Reg; 117 } 118 const MCPhysReg Reg = *BC.MRI->getLLVMRegNum(CFIReg, /*isEH=*/false); 119 if (AliasA.test(Reg)) 120 Function.mutateCFIRegisterFor( 121 Inst, 122 BC.MRI->getDwarfRegNum( 123 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)), false)); 124 else if (AliasB.test(Reg)) 125 Function.mutateCFIRegisterFor( 126 Inst, 127 BC.MRI->getDwarfRegNum( 128 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)), false)); 129 break; 130 } 131 default: 132 break; 133 } 134 } 135 } 136 } 137 138 void RegReAssign::rankRegisters(BinaryFunction &Function) { 139 BinaryContext &BC = Function.getBinaryContext(); 140 std::fill(RegScore.begin(), RegScore.end(), 0); 141 std::fill(RankedRegs.begin(), RankedRegs.end(), 0); 142 143 auto countRegScore = [&](BinaryBasicBlock &BB) { 144 for (MCInst &Inst : BB) { 145 const bool CannotUseREX = BC.MIB->cannotUseREX(Inst); 146 const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); 147 148 // Disallow substituitions involving regs in implicit uses lists 149 for (MCPhysReg ImplicitUse : Desc.implicit_uses()) { 150 const size_t RegEC = 151 BC.MIB->getAliases(ImplicitUse, false).find_first(); 152 RegScore[RegEC] = 153 std::numeric_limits<decltype(RegScore)::value_type>::min(); 154 } 155 156 // Disallow substituitions involving regs in implicit defs lists 157 for (MCPhysReg ImplicitDef : Desc.implicit_defs()) { 158 const size_t RegEC = 159 BC.MIB->getAliases(ImplicitDef, false).find_first(); 160 RegScore[RegEC] = 161 std::numeric_limits<decltype(RegScore)::value_type>::min(); 162 } 163 164 for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) { 165 const MCOperand &Operand = Inst.getOperand(I); 166 if (!Operand.isReg()) 167 continue; 168 169 if (Desc.getOperandConstraint(I, MCOI::TIED_TO) != -1) 170 continue; 171 172 unsigned Reg = Operand.getReg(); 173 size_t RegEC = BC.MIB->getAliases(Reg, false).find_first(); 174 if (RegEC == 0) 175 continue; 176 177 // Disallow substituitions involving regs in instrs that cannot use REX 178 // The relationship of X86 registers is shown in the diagram. BL and BH 179 // do not have a direct alias relationship. However, if the BH register 180 // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as 181 // well, which means that BL register also cannot be swapped. Therefore, 182 // in the presence of BX/EBX/RBX registers, BL and BH have an alias 183 // relationship. 184 // ┌─────────────────┐ 185 // │ RBX │ 186 // ├─────┬───────────┤ 187 // │ │ EBX │ 188 // ├─────┴──┬────────┤ 189 // │ │ BX │ 190 // ├────────┼───┬────┤ 191 // │ │BH │BL │ 192 // └────────┴───┴────┘ 193 if (CannotUseREX) { 194 RegScore[RegEC] = 195 std::numeric_limits<decltype(RegScore)::value_type>::min(); 196 RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC]; 197 continue; 198 } 199 200 // Unsupported substitution, cannot swap BH with R* regs, bail 201 if (BC.MIB->isUpper8BitReg(Reg) && ClassicCSR.test(Reg)) { 202 RegScore[RegEC] = 203 std::numeric_limits<decltype(RegScore)::value_type>::min(); 204 RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC]; 205 continue; 206 } 207 208 RegScore[RegEC] += BB.getKnownExecutionCount(); 209 } 210 } 211 }; 212 for (BinaryBasicBlock &BB : Function) 213 countRegScore(BB); 214 215 for (BinaryFunction *ChildFrag : Function.getFragments()) { 216 for (BinaryBasicBlock &BB : *ChildFrag) 217 countRegScore(BB); 218 } 219 220 std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3... 221 llvm::sort(RankedRegs, 222 [&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; }); 223 224 LLVM_DEBUG({ 225 for (size_t Reg : RankedRegs) { 226 if (RegScore[Reg] == 0) 227 continue; 228 dbgs() << Reg << " "; 229 if (RegScore[Reg] > 0) 230 dbgs() << BC.MRI->getName(Reg) << ": " << RegScore[Reg] << "\n"; 231 else 232 dbgs() << BC.MRI->getName(Reg) << ": (blacklisted)\n"; 233 } 234 }); 235 } 236 237 void RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) { 238 BinaryContext &BC = Function.getBinaryContext(); 239 rankRegisters(Function); 240 241 // If there is a situation where function: 242 // A() -> A.cold() 243 // A.localalias() -> A.cold() 244 // simply swapping these two calls can cause issues. 245 for (BinaryFunction *ChildFrag : Function.getFragments()) { 246 if (ChildFrag->getParentFragments()->size() > 1) 247 return; 248 if (ChildFrag->empty()) 249 return; 250 } 251 252 // Bail early if our registers are all black listed, before running expensive 253 // analysis passes 254 bool Bail = true; 255 int64_t LowScoreClassic = std::numeric_limits<int64_t>::max(); 256 for (int J : ClassicRegs.set_bits()) { 257 if (RegScore[J] <= 0) 258 continue; 259 Bail = false; 260 if (RegScore[J] < LowScoreClassic) 261 LowScoreClassic = RegScore[J]; 262 } 263 if (Bail) 264 return; 265 BitVector Extended = ClassicRegs; 266 Extended.flip(); 267 Extended &= GPRegs; 268 Bail = true; 269 int64_t HighScoreExtended = 0; 270 for (int J : Extended.set_bits()) { 271 if (RegScore[J] <= 0) 272 continue; 273 Bail = false; 274 if (RegScore[J] > HighScoreExtended) 275 HighScoreExtended = RegScore[J]; 276 } 277 // Also bail early if there is no profitable substitution even if we assume 278 // all registers can be exchanged 279 if (Bail || (LowScoreClassic << 1) >= HighScoreExtended) 280 return; 281 282 // -- expensive pass -- determine all regs alive during func start 283 DataflowInfoManager Info(Function, RA.get(), nullptr); 284 BitVector AliveAtStart = *Info.getLivenessAnalysis().getStateAt( 285 ProgramPoint::getFirstPointAt(*Function.begin())); 286 for (BinaryBasicBlock &BB : Function) 287 if (BB.pred_size() == 0) 288 AliveAtStart |= *Info.getLivenessAnalysis().getStateAt( 289 ProgramPoint::getFirstPointAt(BB)); 290 291 // Mark frame pointer alive because of CFI 292 AliveAtStart |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false); 293 // Never touch return registers 294 BC.MIB->getDefaultLiveOut(AliveAtStart); 295 296 // Try swapping more profitable options first 297 auto Begin = RankedRegs.begin(); 298 auto End = std::prev(RankedRegs.end()); 299 while (Begin != End) { 300 MCPhysReg ClassicReg = *End; 301 if (!ClassicRegs[ClassicReg] || RegScore[ClassicReg] <= 0) { 302 --End; 303 continue; 304 } 305 306 MCPhysReg ExtReg = *Begin; 307 if (!Extended[ExtReg] || RegScore[ExtReg] <= 0) { 308 ++Begin; 309 continue; 310 } 311 312 if (RegScore[ClassicReg] << 1 >= RegScore[ExtReg]) { 313 LLVM_DEBUG(dbgs() << " Ending at " << BC.MRI->getName(ClassicReg) 314 << " with " << BC.MRI->getName(ExtReg) 315 << " because exchange is not profitable\n"); 316 break; 317 } 318 319 BitVector AnyAliasAlive = AliveAtStart; 320 AnyAliasAlive &= BC.MIB->getAliases(ClassicReg); 321 if (AnyAliasAlive.any()) { 322 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg) 323 << " with " << BC.MRI->getName(ExtReg) 324 << " because classic reg is alive\n"); 325 --End; 326 continue; 327 } 328 AnyAliasAlive = AliveAtStart; 329 AnyAliasAlive &= BC.MIB->getAliases(ExtReg); 330 if (AnyAliasAlive.any()) { 331 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg) 332 << " with " << BC.MRI->getName(ExtReg) 333 << " because extended reg is alive\n"); 334 ++Begin; 335 continue; 336 } 337 338 // Opportunity detected. Swap. 339 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(ClassicReg) 340 << " with " << BC.MRI->getName(ExtReg) << "\n\n"); 341 swap(Function, ClassicReg, ExtReg); 342 FuncsChanged.insert(&Function); 343 for (BinaryFunction *ChildFrag : Function.getFragments()) { 344 swap(*ChildFrag, ClassicReg, ExtReg); 345 FuncsChanged.insert(ChildFrag); 346 } 347 ++Begin; 348 if (Begin == End) 349 break; 350 --End; 351 } 352 } 353 354 bool RegReAssign::conservativePassOverFunction(BinaryFunction &Function) { 355 BinaryContext &BC = Function.getBinaryContext(); 356 rankRegisters(Function); 357 358 for (BinaryFunction *ChildFrag : Function.getFragments()) { 359 if (ChildFrag->getParentFragments()->size() > 1) 360 return false; 361 if (ChildFrag->empty()) 362 return false; 363 } 364 365 // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved 366 // regs except RBP) 367 MCPhysReg Candidate = 0; 368 for (int J : ExtendedCSR.set_bits()) 369 if (RegScore[J] > RegScore[Candidate]) 370 Candidate = J; 371 372 if (!Candidate || RegScore[Candidate] < 0) 373 return false; 374 375 // Check if our classic callee-saved reg (RBX is the only one) has lower 376 // score / utilization rate 377 MCPhysReg RBX = 0; 378 for (int I : ClassicCSR.set_bits()) { 379 int64_t ScoreRBX = RegScore[I]; 380 if (ScoreRBX <= 0) 381 continue; 382 383 if (RegScore[Candidate] > (ScoreRBX + 10)) 384 RBX = I; 385 } 386 387 if (!RBX) 388 return false; 389 390 // The high 8 bits of the register will never be swapped. To prevent the high 391 // 8 bits from being swapped incorrectly, we should switched to swapping the 392 // low 8 bits of the register instead. 393 if (BC.MIB->isUpper8BitReg(RBX)) { 394 RBX = BC.MIB->getAliasSized(RBX, 1); 395 if (RegScore[RBX] < 0 || RegScore[RBX] > RegScore[Candidate]) 396 return false; 397 } 398 399 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(RBX) << " with " 400 << BC.MRI->getName(Candidate) << "\n\n"); 401 (void)BC; 402 swap(Function, RBX, Candidate); 403 FuncsChanged.insert(&Function); 404 for (BinaryFunction *ChildFrag : Function.getFragments()) { 405 swap(*ChildFrag, RBX, Candidate); 406 FuncsChanged.insert(ChildFrag); 407 } 408 return true; 409 } 410 411 void RegReAssign::setupAggressivePass(BinaryContext &BC, 412 std::map<uint64_t, BinaryFunction> &BFs) { 413 setupConservativePass(BC, BFs); 414 CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC))); 415 RA.reset(new RegAnalysis(BC, &BFs, &*CG)); 416 417 GPRegs = BitVector(BC.MRI->getNumRegs(), false); 418 BC.MIB->getGPRegs(GPRegs); 419 } 420 421 void RegReAssign::setupConservativePass( 422 BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) { 423 // Set up constant bitvectors used throughout this analysis 424 ClassicRegs = BitVector(BC.MRI->getNumRegs(), false); 425 CalleeSaved = BitVector(BC.MRI->getNumRegs(), false); 426 ClassicCSR = BitVector(BC.MRI->getNumRegs(), false); 427 ExtendedCSR = BitVector(BC.MRI->getNumRegs(), false); 428 // Never consider the frame pointer 429 BC.MIB->getClassicGPRegs(ClassicRegs); 430 ClassicRegs.flip(); 431 ClassicRegs |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false); 432 ClassicRegs.flip(); 433 BC.MIB->getCalleeSavedRegs(CalleeSaved); 434 ClassicCSR |= ClassicRegs; 435 ClassicCSR &= CalleeSaved; 436 BC.MIB->getClassicGPRegs(ClassicRegs); 437 ExtendedCSR |= ClassicRegs; 438 ExtendedCSR.flip(); 439 ExtendedCSR &= CalleeSaved; 440 441 LLVM_DEBUG({ 442 RegStatePrinter P(BC); 443 dbgs() << "Starting register reassignment\nClassicRegs: "; 444 P.print(dbgs(), ClassicRegs); 445 dbgs() << "\nCalleeSaved: "; 446 P.print(dbgs(), CalleeSaved); 447 dbgs() << "\nClassicCSR: "; 448 P.print(dbgs(), ClassicCSR); 449 dbgs() << "\nExtendedCSR: "; 450 P.print(dbgs(), ExtendedCSR); 451 dbgs() << "\n"; 452 }); 453 } 454 455 Error RegReAssign::runOnFunctions(BinaryContext &BC) { 456 RegScore = std::vector<int64_t>(BC.MRI->getNumRegs(), 0); 457 RankedRegs = std::vector<size_t>(BC.MRI->getNumRegs(), 0); 458 459 if (opts::AggressiveReAssign) 460 setupAggressivePass(BC, BC.getBinaryFunctions()); 461 else 462 setupConservativePass(BC, BC.getBinaryFunctions()); 463 464 for (auto &I : BC.getBinaryFunctions()) { 465 BinaryFunction &Function = I.second; 466 467 if (!Function.isSimple() || Function.isIgnored() || Function.isFragment()) 468 continue; 469 470 LLVM_DEBUG(dbgs() << "====================================\n"); 471 LLVM_DEBUG(dbgs() << " - " << Function.getPrintName() << "\n"); 472 if (!conservativePassOverFunction(Function) && opts::AggressiveReAssign) { 473 aggressivePassOverFunction(Function); 474 LLVM_DEBUG({ 475 if (FuncsChanged.count(&Function)) 476 dbgs() << "Aggressive pass successful on " << Function.getPrintName() 477 << "\n"; 478 }); 479 } 480 } 481 482 if (FuncsChanged.empty()) { 483 outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n"; 484 return Error::success(); 485 } 486 if (opts::UpdateDebugSections) 487 outs() << "BOLT-WARNING: You used -reg-reassign and -update-debug-sections." 488 << " Some registers were changed but associated AT_LOCATION for " 489 << "impacted variables were NOT updated! This operation is " 490 << "currently unsupported by BOLT.\n"; 491 outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n"; 492 outs() << "\t " << FuncsChanged.size() << " functions affected.\n"; 493 outs() << "\t " << StaticBytesSaved << " static bytes saved.\n"; 494 outs() << "\t " << DynBytesSaved << " dynamic bytes saved.\n"; 495 return Error::success(); 496 } 497 498 } // namespace bolt 499 } // namespace llvm 500