1 //===- bolt/Passes/RegReAssign.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the RegReAssign class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/RegReAssign.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/BinaryFunctionCallGraph.h" 16 #include "bolt/Passes/DataflowAnalysis.h" 17 #include "bolt/Passes/DataflowInfoManager.h" 18 #include "bolt/Utils/Utils.h" 19 #include <numeric> 20 21 #define DEBUG_TYPE "regreassign" 22 23 using namespace llvm; 24 25 namespace opts { 26 extern cl::OptionCategory BoltOptCategory; 27 extern cl::opt<bool> UpdateDebugSections; 28 29 static cl::opt<bool> AggressiveReAssign( 30 "use-aggr-reg-reassign", 31 cl::desc("use register liveness analysis to try to find more opportunities " 32 "for -reg-reassign optimization"), 33 cl::cat(BoltOptCategory)); 34 } 35 36 namespace llvm { 37 namespace bolt { 38 39 void RegReAssign::swap(BinaryFunction &Function, MCPhysReg A, MCPhysReg B) { 40 BinaryContext &BC = Function.getBinaryContext(); 41 const BitVector &AliasA = BC.MIB->getAliases(A, false); 42 const BitVector &AliasB = BC.MIB->getAliases(B, false); 43 44 // Regular instructions 45 for (BinaryBasicBlock &BB : Function) { 46 for (MCInst &Inst : BB) { 47 for (MCOperand &Operand : MCPlus::primeOperands(Inst)) { 48 if (!Operand.isReg()) 49 continue; 50 51 unsigned Reg = Operand.getReg(); 52 if (AliasA.test(Reg)) { 53 Operand.setReg(BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg))); 54 --StaticBytesSaved; 55 DynBytesSaved -= BB.getKnownExecutionCount(); 56 continue; 57 } 58 if (!AliasB.test(Reg)) 59 continue; 60 Operand.setReg(BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg))); 61 ++StaticBytesSaved; 62 DynBytesSaved += BB.getKnownExecutionCount(); 63 } 64 } 65 } 66 67 // CFI 68 DenseSet<const MCCFIInstruction *> Changed; 69 for (BinaryBasicBlock &BB : Function) { 70 for (MCInst &Inst : BB) { 71 if (!BC.MIB->isCFI(Inst)) 72 continue; 73 const MCCFIInstruction *CFI = Function.getCFIFor(Inst); 74 if (Changed.count(CFI)) 75 continue; 76 Changed.insert(CFI); 77 78 switch (CFI->getOperation()) { 79 case MCCFIInstruction::OpRegister: { 80 const unsigned CFIReg2 = CFI->getRegister2(); 81 const MCPhysReg Reg2 = *BC.MRI->getLLVMRegNum(CFIReg2, /*isEH=*/false); 82 if (AliasA.test(Reg2)) { 83 Function.setCFIFor( 84 Inst, MCCFIInstruction::createRegister( 85 nullptr, CFI->getRegister(), 86 BC.MRI->getDwarfRegNum( 87 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg2)), 88 false))); 89 } else if (AliasB.test(Reg2)) { 90 Function.setCFIFor( 91 Inst, MCCFIInstruction::createRegister( 92 nullptr, CFI->getRegister(), 93 BC.MRI->getDwarfRegNum( 94 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg2)), 95 false))); 96 } 97 } 98 LLVM_FALLTHROUGH; 99 case MCCFIInstruction::OpUndefined: 100 case MCCFIInstruction::OpDefCfa: 101 case MCCFIInstruction::OpOffset: 102 case MCCFIInstruction::OpRestore: 103 case MCCFIInstruction::OpSameValue: 104 case MCCFIInstruction::OpDefCfaRegister: 105 case MCCFIInstruction::OpRelOffset: 106 case MCCFIInstruction::OpEscape: { 107 unsigned CFIReg; 108 if (CFI->getOperation() != MCCFIInstruction::OpEscape) { 109 CFIReg = CFI->getRegister(); 110 } else { 111 Optional<uint8_t> Reg = 112 readDWARFExpressionTargetReg(CFI->getValues()); 113 // Handle DW_CFA_def_cfa_expression 114 if (!Reg) 115 break; 116 CFIReg = *Reg; 117 } 118 const MCPhysReg Reg = *BC.MRI->getLLVMRegNum(CFIReg, /*isEH=*/false); 119 if (AliasA.test(Reg)) 120 Function.mutateCFIRegisterFor( 121 Inst, 122 BC.MRI->getDwarfRegNum( 123 BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)), false)); 124 else if (AliasB.test(Reg)) 125 Function.mutateCFIRegisterFor( 126 Inst, 127 BC.MRI->getDwarfRegNum( 128 BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)), false)); 129 break; 130 } 131 default: 132 break; 133 } 134 } 135 } 136 } 137 138 void RegReAssign::rankRegisters(BinaryFunction &Function) { 139 BinaryContext &BC = Function.getBinaryContext(); 140 std::fill(RegScore.begin(), RegScore.end(), 0); 141 std::fill(RankedRegs.begin(), RankedRegs.end(), 0); 142 143 for (BinaryBasicBlock &BB : Function) { 144 for (MCInst &Inst : BB) { 145 const bool CannotUseREX = BC.MIB->cannotUseREX(Inst); 146 const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); 147 148 // Disallow substituitions involving regs in implicit uses lists 149 const MCPhysReg *ImplicitUses = Desc.getImplicitUses(); 150 while (ImplicitUses && *ImplicitUses) { 151 const size_t RegEC = 152 BC.MIB->getAliases(*ImplicitUses, false).find_first(); 153 RegScore[RegEC] = 154 std::numeric_limits<decltype(RegScore)::value_type>::min(); 155 ++ImplicitUses; 156 } 157 158 // Disallow substituitions involving regs in implicit defs lists 159 const MCPhysReg *ImplicitDefs = Desc.getImplicitDefs(); 160 while (ImplicitDefs && *ImplicitDefs) { 161 const size_t RegEC = 162 BC.MIB->getAliases(*ImplicitDefs, false).find_first(); 163 RegScore[RegEC] = 164 std::numeric_limits<decltype(RegScore)::value_type>::min(); 165 ++ImplicitDefs; 166 } 167 168 for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) { 169 const MCOperand &Operand = Inst.getOperand(I); 170 if (!Operand.isReg()) 171 continue; 172 173 if (Desc.getOperandConstraint(I, MCOI::TIED_TO) != -1) 174 continue; 175 176 unsigned Reg = Operand.getReg(); 177 size_t RegEC = BC.MIB->getAliases(Reg, false).find_first(); 178 if (RegEC == 0) 179 continue; 180 181 // Disallow substituitions involving regs in instrs that cannot use REX 182 if (CannotUseREX) { 183 RegScore[RegEC] = 184 std::numeric_limits<decltype(RegScore)::value_type>::min(); 185 continue; 186 } 187 188 // Unsupported substitution, cannot swap BH with R* regs, bail 189 if (BC.MIB->isUpper8BitReg(Reg) && ClassicCSR.test(Reg)) { 190 RegScore[RegEC] = 191 std::numeric_limits<decltype(RegScore)::value_type>::min(); 192 continue; 193 } 194 195 RegScore[RegEC] += BB.getKnownExecutionCount(); 196 } 197 } 198 } 199 std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3... 200 llvm::sort(RankedRegs, 201 [&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; }); 202 203 LLVM_DEBUG({ 204 for (size_t Reg : RankedRegs) { 205 if (RegScore[Reg] == 0) 206 continue; 207 dbgs() << Reg << " "; 208 if (RegScore[Reg] > 0) 209 dbgs() << BC.MRI->getName(Reg) << ": " << RegScore[Reg] << "\n"; 210 else 211 dbgs() << BC.MRI->getName(Reg) << ": (blacklisted)\n"; 212 } 213 }); 214 } 215 216 void RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) { 217 BinaryContext &BC = Function.getBinaryContext(); 218 rankRegisters(Function); 219 220 // Bail early if our registers are all black listed, before running expensive 221 // analysis passes 222 bool Bail = true; 223 int64_t LowScoreClassic = std::numeric_limits<int64_t>::max(); 224 for (int J : ClassicRegs.set_bits()) { 225 if (RegScore[J] <= 0) 226 continue; 227 Bail = false; 228 if (RegScore[J] < LowScoreClassic) 229 LowScoreClassic = RegScore[J]; 230 } 231 if (Bail) 232 return; 233 BitVector Extended = ClassicRegs; 234 Extended.flip(); 235 Extended &= GPRegs; 236 Bail = true; 237 int64_t HighScoreExtended = 0; 238 for (int J : Extended.set_bits()) { 239 if (RegScore[J] <= 0) 240 continue; 241 Bail = false; 242 if (RegScore[J] > HighScoreExtended) 243 HighScoreExtended = RegScore[J]; 244 } 245 // Also bail early if there is no profitable substitution even if we assume 246 // all registers can be exchanged 247 if (Bail || (LowScoreClassic << 1) >= HighScoreExtended) 248 return; 249 250 // -- expensive pass -- determine all regs alive during func start 251 DataflowInfoManager Info(Function, RA.get(), nullptr); 252 BitVector AliveAtStart = *Info.getLivenessAnalysis().getStateAt( 253 ProgramPoint::getFirstPointAt(*Function.begin())); 254 for (BinaryBasicBlock &BB : Function) 255 if (BB.pred_size() == 0) 256 AliveAtStart |= *Info.getLivenessAnalysis().getStateAt( 257 ProgramPoint::getFirstPointAt(BB)); 258 259 // Mark frame pointer alive because of CFI 260 AliveAtStart |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false); 261 // Never touch return registers 262 BC.MIB->getDefaultLiveOut(AliveAtStart); 263 264 // Try swapping more profitable options first 265 auto Begin = RankedRegs.begin(); 266 auto End = std::prev(RankedRegs.end()); 267 while (Begin != End) { 268 MCPhysReg ClassicReg = *End; 269 if (!ClassicRegs[ClassicReg] || RegScore[ClassicReg] <= 0) { 270 --End; 271 continue; 272 } 273 274 MCPhysReg ExtReg = *Begin; 275 if (!Extended[ExtReg] || RegScore[ExtReg] <= 0) { 276 ++Begin; 277 continue; 278 } 279 280 if (RegScore[ClassicReg] << 1 >= RegScore[ExtReg]) { 281 LLVM_DEBUG(dbgs() << " Ending at " << BC.MRI->getName(ClassicReg) 282 << " with " << BC.MRI->getName(ExtReg) 283 << " because exchange is not profitable\n"); 284 break; 285 } 286 287 BitVector AnyAliasAlive = AliveAtStart; 288 AnyAliasAlive &= BC.MIB->getAliases(ClassicReg); 289 if (AnyAliasAlive.any()) { 290 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg) 291 << " with " << BC.MRI->getName(ExtReg) 292 << " because classic reg is alive\n"); 293 --End; 294 continue; 295 } 296 AnyAliasAlive = AliveAtStart; 297 AnyAliasAlive &= BC.MIB->getAliases(ExtReg); 298 if (AnyAliasAlive.any()) { 299 LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg) 300 << " with " << BC.MRI->getName(ExtReg) 301 << " because extended reg is alive\n"); 302 ++Begin; 303 continue; 304 } 305 306 // Opportunity detected. Swap. 307 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(ClassicReg) 308 << " with " << BC.MRI->getName(ExtReg) << "\n\n"); 309 swap(Function, ClassicReg, ExtReg); 310 FuncsChanged.insert(&Function); 311 ++Begin; 312 if (Begin == End) 313 break; 314 --End; 315 } 316 } 317 318 bool RegReAssign::conservativePassOverFunction(BinaryFunction &Function) { 319 BinaryContext &BC = Function.getBinaryContext(); 320 rankRegisters(Function); 321 322 // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved 323 // regs except RBP) 324 MCPhysReg Candidate = 0; 325 for (int J : ExtendedCSR.set_bits()) 326 if (RegScore[J] > RegScore[Candidate]) 327 Candidate = J; 328 329 if (!Candidate || RegScore[Candidate] < 0) 330 return false; 331 332 // Check if our classic callee-saved reg (RBX is the only one) has lower 333 // score / utilization rate 334 MCPhysReg RBX = 0; 335 for (int I : ClassicCSR.set_bits()) { 336 int64_t ScoreRBX = RegScore[I]; 337 if (ScoreRBX <= 0) 338 continue; 339 340 if (RegScore[Candidate] > (ScoreRBX + 10)) 341 RBX = I; 342 } 343 344 if (!RBX) 345 return false; 346 347 LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(RBX) << " with " 348 << BC.MRI->getName(Candidate) << "\n\n"); 349 (void)BC; 350 swap(Function, RBX, Candidate); 351 FuncsChanged.insert(&Function); 352 return true; 353 } 354 355 void RegReAssign::setupAggressivePass(BinaryContext &BC, 356 std::map<uint64_t, BinaryFunction> &BFs) { 357 setupConservativePass(BC, BFs); 358 CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC))); 359 RA.reset(new RegAnalysis(BC, &BFs, &*CG)); 360 361 GPRegs = BitVector(BC.MRI->getNumRegs(), false); 362 BC.MIB->getGPRegs(GPRegs); 363 } 364 365 void RegReAssign::setupConservativePass( 366 BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) { 367 // Set up constant bitvectors used throughout this analysis 368 ClassicRegs = BitVector(BC.MRI->getNumRegs(), false); 369 CalleeSaved = BitVector(BC.MRI->getNumRegs(), false); 370 ClassicCSR = BitVector(BC.MRI->getNumRegs(), false); 371 ExtendedCSR = BitVector(BC.MRI->getNumRegs(), false); 372 // Never consider the frame pointer 373 BC.MIB->getClassicGPRegs(ClassicRegs); 374 ClassicRegs.flip(); 375 ClassicRegs |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false); 376 ClassicRegs.flip(); 377 BC.MIB->getCalleeSavedRegs(CalleeSaved); 378 ClassicCSR |= ClassicRegs; 379 ClassicCSR &= CalleeSaved; 380 BC.MIB->getClassicGPRegs(ClassicRegs); 381 ExtendedCSR |= ClassicRegs; 382 ExtendedCSR.flip(); 383 ExtendedCSR &= CalleeSaved; 384 385 LLVM_DEBUG({ 386 RegStatePrinter P(BC); 387 dbgs() << "Starting register reassignment\nClassicRegs: "; 388 P.print(dbgs(), ClassicRegs); 389 dbgs() << "\nCalleeSaved: "; 390 P.print(dbgs(), CalleeSaved); 391 dbgs() << "\nClassicCSR: "; 392 P.print(dbgs(), ClassicCSR); 393 dbgs() << "\nExtendedCSR: "; 394 P.print(dbgs(), ExtendedCSR); 395 dbgs() << "\n"; 396 }); 397 } 398 399 void RegReAssign::runOnFunctions(BinaryContext &BC) { 400 RegScore = std::vector<int64_t>(BC.MRI->getNumRegs(), 0); 401 RankedRegs = std::vector<size_t>(BC.MRI->getNumRegs(), 0); 402 403 if (opts::AggressiveReAssign) 404 setupAggressivePass(BC, BC.getBinaryFunctions()); 405 else 406 setupConservativePass(BC, BC.getBinaryFunctions()); 407 408 for (auto &I : BC.getBinaryFunctions()) { 409 BinaryFunction &Function = I.second; 410 411 if (!Function.isSimple() || Function.isIgnored()) 412 continue; 413 414 LLVM_DEBUG(dbgs() << "====================================\n"); 415 LLVM_DEBUG(dbgs() << " - " << Function.getPrintName() << "\n"); 416 if (!conservativePassOverFunction(Function) && opts::AggressiveReAssign) { 417 aggressivePassOverFunction(Function); 418 LLVM_DEBUG({ 419 if (FuncsChanged.count(&Function)) 420 dbgs() << "Aggressive pass successful on " << Function.getPrintName() 421 << "\n"; 422 }); 423 } 424 } 425 426 if (FuncsChanged.empty()) { 427 outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n"; 428 return; 429 } 430 if (opts::UpdateDebugSections) 431 outs() << "BOLT-WARNING: You used -reg-reassign and -update-debug-sections." 432 << " Some registers were changed but associated AT_LOCATION for " 433 << "impacted variables were NOT updated! This operation is " 434 << "currently unsupported by BOLT.\n"; 435 outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n"; 436 outs() << "\t " << FuncsChanged.size() << " functions affected.\n"; 437 outs() << "\t " << StaticBytesSaved << " static bytes saved.\n"; 438 outs() << "\t " << DynBytesSaved << " dynamic bytes saved.\n"; 439 } 440 441 } // namespace bolt 442 } // namespace llvm 443