1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineInstrBuilder.h" 18 #include "llvm/CodeGen/MachineRegisterInfo.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Target/TargetMachine.h" 22 23 #define DEBUG_TYPE "si-fold-operands" 24 using namespace llvm; 25 26 namespace { 27 28 class SIFoldOperands : public MachineFunctionPass { 29 public: 30 static char ID; 31 32 public: 33 SIFoldOperands() : MachineFunctionPass(ID) { 34 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 35 } 36 37 bool runOnMachineFunction(MachineFunction &MF) override; 38 39 StringRef getPassName() const override { return "SI Fold Operands"; } 40 41 void getAnalysisUsage(AnalysisUsage &AU) const override { 42 AU.setPreservesCFG(); 43 MachineFunctionPass::getAnalysisUsage(AU); 44 } 45 }; 46 47 struct FoldCandidate { 48 MachineInstr *UseMI; 49 union { 50 MachineOperand *OpToFold; 51 uint64_t ImmToFold; 52 int FrameIndexToFold; 53 }; 54 unsigned char UseOpNo; 55 MachineOperand::MachineOperandType Kind; 56 57 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 58 UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { 59 if (FoldOp->isImm()) { 60 ImmToFold = FoldOp->getImm(); 61 } else if (FoldOp->isFI()) { 62 FrameIndexToFold = FoldOp->getIndex(); 63 } else { 64 assert(FoldOp->isReg()); 65 OpToFold = FoldOp; 66 } 67 } 68 69 bool isFI() const { 70 return Kind == MachineOperand::MO_FrameIndex; 71 } 72 73 bool isImm() const { 74 return Kind == MachineOperand::MO_Immediate; 75 } 76 77 bool isReg() const { 78 return Kind == MachineOperand::MO_Register; 79 } 80 }; 81 82 } // End anonymous namespace. 83 84 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE, 85 "SI Fold Operands", false, false) 86 87 char SIFoldOperands::ID = 0; 88 89 char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 90 91 FunctionPass *llvm::createSIFoldOperandsPass() { 92 return new SIFoldOperands(); 93 } 94 95 static bool isSafeToFold(const MachineInstr &MI) { 96 switch (MI.getOpcode()) { 97 case AMDGPU::V_MOV_B32_e32: 98 case AMDGPU::V_MOV_B32_e64: 99 case AMDGPU::V_MOV_B64_PSEUDO: { 100 // If there are additional implicit register operands, this may be used for 101 // register indexing so the source register operand isn't simply copied. 102 unsigned NumOps = MI.getDesc().getNumOperands() + 103 MI.getDesc().getNumImplicitUses(); 104 105 return MI.getNumOperands() == NumOps; 106 } 107 case AMDGPU::S_MOV_B32: 108 case AMDGPU::S_MOV_B64: 109 case AMDGPU::COPY: 110 return true; 111 default: 112 return false; 113 } 114 } 115 116 static bool updateOperand(FoldCandidate &Fold, 117 const TargetRegisterInfo &TRI) { 118 MachineInstr *MI = Fold.UseMI; 119 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 120 assert(Old.isReg()); 121 122 if (Fold.isImm()) { 123 Old.ChangeToImmediate(Fold.ImmToFold); 124 return true; 125 } 126 127 if (Fold.isFI()) { 128 Old.ChangeToFrameIndex(Fold.FrameIndexToFold); 129 return true; 130 } 131 132 MachineOperand *New = Fold.OpToFold; 133 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 134 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 135 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 136 return true; 137 } 138 139 // FIXME: Handle physical registers. 140 141 return false; 142 } 143 144 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, 145 const MachineInstr *MI) { 146 for (auto Candidate : FoldList) { 147 if (Candidate.UseMI == MI) 148 return true; 149 } 150 return false; 151 } 152 153 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 154 MachineInstr *MI, unsigned OpNo, 155 MachineOperand *OpToFold, 156 const SIInstrInfo *TII) { 157 if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { 158 159 // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2 160 unsigned Opc = MI->getOpcode(); 161 if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) && 162 (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { 163 bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; 164 165 // Check if changing this to a v_mad_{f16, f32} instruction will allow us 166 // to fold the operand. 167 MI->setDesc(TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16)); 168 bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); 169 if (FoldAsMAD) { 170 MI->untieRegOperand(OpNo); 171 return true; 172 } 173 MI->setDesc(TII->get(Opc)); 174 } 175 176 // Special case for s_setreg_b32 177 if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) { 178 MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32)); 179 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 180 return true; 181 } 182 183 // If we are already folding into another operand of MI, then 184 // we can't commute the instruction, otherwise we risk making the 185 // other fold illegal. 186 if (isUseMIInFoldList(FoldList, MI)) 187 return false; 188 189 // Operand is not legal, so try to commute the instruction to 190 // see if this makes it possible to fold. 191 unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; 192 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; 193 bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1); 194 195 if (CanCommute) { 196 if (CommuteIdx0 == OpNo) 197 OpNo = CommuteIdx1; 198 else if (CommuteIdx1 == OpNo) 199 OpNo = CommuteIdx0; 200 } 201 202 // One of operands might be an Imm operand, and OpNo may refer to it after 203 // the call of commuteInstruction() below. Such situations are avoided 204 // here explicitly as OpNo must be a register operand to be a candidate 205 // for memory folding. 206 if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() || 207 !MI->getOperand(CommuteIdx1).isReg())) 208 return false; 209 210 if (!CanCommute || 211 !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) 212 return false; 213 214 if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) 215 return false; 216 } 217 218 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 219 return true; 220 } 221 222 // If the use operand doesn't care about the value, this may be an operand only 223 // used for register indexing, in which case it is unsafe to fold. 224 static bool isUseSafeToFold(const MachineInstr &MI, 225 const MachineOperand &UseMO) { 226 return !UseMO.isUndef(); 227 //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); 228 } 229 230 static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, 231 unsigned UseOpIdx, 232 std::vector<FoldCandidate> &FoldList, 233 SmallVectorImpl<MachineInstr *> &CopiesToReplace, 234 const SIInstrInfo *TII, const SIRegisterInfo &TRI, 235 MachineRegisterInfo &MRI) { 236 const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); 237 238 if (!isUseSafeToFold(*UseMI, UseOp)) 239 return; 240 241 // FIXME: Fold operands with subregs. 242 if (UseOp.isReg() && OpToFold.isReg()) { 243 if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) 244 return; 245 246 // Don't fold subregister extracts into tied operands, only if it is a full 247 // copy since a subregister use tied to a full register def doesn't really 248 // make sense. e.g. don't fold: 249 // 250 // %vreg1 = COPY %vreg0:sub1 251 // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0> 252 // 253 // into 254 // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0> 255 if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) 256 return; 257 } 258 259 // Special case for REG_SEQUENCE: We can't fold literals into 260 // REG_SEQUENCE instructions, so we have to fold them into the 261 // uses of REG_SEQUENCE. 262 if (UseMI->isRegSequence()) { 263 unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); 264 unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); 265 266 for (MachineRegisterInfo::use_iterator 267 RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end(); 268 RSUse != RSE; ++RSUse) { 269 270 MachineInstr *RSUseMI = RSUse->getParent(); 271 if (RSUse->getSubReg() != RegSeqDstSubReg) 272 continue; 273 274 foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, 275 CopiesToReplace, TII, TRI, MRI); 276 } 277 278 return; 279 } 280 281 282 bool FoldingImm = OpToFold.isImm(); 283 284 // In order to fold immediates into copies, we need to change the 285 // copy to a MOV. 286 if (FoldingImm && UseMI->isCopy()) { 287 unsigned DestReg = UseMI->getOperand(0).getReg(); 288 const TargetRegisterClass *DestRC 289 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 290 MRI.getRegClass(DestReg) : 291 TRI.getPhysRegClass(DestReg); 292 293 unsigned MovOp = TII->getMovOpcode(DestRC); 294 if (MovOp == AMDGPU::COPY) 295 return; 296 297 UseMI->setDesc(TII->get(MovOp)); 298 CopiesToReplace.push_back(UseMI); 299 } else { 300 const MCInstrDesc &UseDesc = UseMI->getDesc(); 301 302 // Don't fold into target independent nodes. Target independent opcodes 303 // don't have defined register classes. 304 if (UseDesc.isVariadic() || 305 UseDesc.OpInfo[UseOpIdx].RegClass == -1) 306 return; 307 } 308 309 if (!FoldingImm) { 310 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); 311 312 // FIXME: We could try to change the instruction from 64-bit to 32-bit 313 // to enable more folding opportunites. The shrink operands pass 314 // already does this. 315 return; 316 } 317 318 319 const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); 320 const TargetRegisterClass *FoldRC = 321 TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); 322 323 APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType), 324 OpToFold.getImm()); 325 326 // Split 64-bit constants into 32-bits for folding. 327 if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { 328 unsigned UseReg = UseOp.getReg(); 329 const TargetRegisterClass *UseRC 330 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 331 MRI.getRegClass(UseReg) : 332 TRI.getPhysRegClass(UseReg); 333 334 assert(Imm.getBitWidth() == 64); 335 336 if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) 337 return; 338 339 if (UseOp.getSubReg() == AMDGPU::sub0) { 340 Imm = Imm.getLoBits(32); 341 } else { 342 assert(UseOp.getSubReg() == AMDGPU::sub1); 343 Imm = Imm.getHiBits(32); 344 } 345 } 346 347 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 348 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); 349 } 350 351 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, 352 int32_t LHS, int32_t RHS) { 353 switch (Opcode) { 354 case AMDGPU::V_AND_B32_e64: 355 case AMDGPU::S_AND_B32: 356 Result = LHS & RHS; 357 return true; 358 case AMDGPU::V_OR_B32_e64: 359 case AMDGPU::S_OR_B32: 360 Result = LHS | RHS; 361 return true; 362 case AMDGPU::V_XOR_B32_e64: 363 case AMDGPU::S_XOR_B32: 364 Result = LHS ^ RHS; 365 return true; 366 default: 367 return false; 368 } 369 } 370 371 static unsigned getMovOpc(bool IsScalar) { 372 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 373 } 374 375 /// Remove any leftover implicit operands from mutating the instruction. e.g. 376 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def 377 /// anymore. 378 static void stripExtraCopyOperands(MachineInstr &MI) { 379 const MCInstrDesc &Desc = MI.getDesc(); 380 unsigned NumOps = Desc.getNumOperands() + 381 Desc.getNumImplicitUses() + 382 Desc.getNumImplicitDefs(); 383 384 for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I) 385 MI.RemoveOperand(I); 386 } 387 388 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { 389 MI.setDesc(NewDesc); 390 stripExtraCopyOperands(MI); 391 } 392 393 // Try to simplify operations with a constant that may appear after instruction 394 // selection. 395 static bool tryConstantFoldOp(MachineRegisterInfo &MRI, 396 const SIInstrInfo *TII, 397 MachineInstr *MI) { 398 unsigned Opc = MI->getOpcode(); 399 400 if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || 401 Opc == AMDGPU::S_NOT_B32) { 402 MachineOperand &Src0 = MI->getOperand(1); 403 if (Src0.isImm()) { 404 Src0.setImm(~Src0.getImm()); 405 mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); 406 return true; 407 } 408 409 return false; 410 } 411 412 if (!MI->isCommutable()) 413 return false; 414 415 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 416 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 417 418 MachineOperand *Src0 = &MI->getOperand(Src0Idx); 419 MachineOperand *Src1 = &MI->getOperand(Src1Idx); 420 if (!Src0->isImm() && !Src1->isImm()) 421 return false; 422 423 // and k0, k1 -> v_mov_b32 (k0 & k1) 424 // or k0, k1 -> v_mov_b32 (k0 | k1) 425 // xor k0, k1 -> v_mov_b32 (k0 ^ k1) 426 if (Src0->isImm() && Src1->isImm()) { 427 int32_t NewImm; 428 if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) 429 return false; 430 431 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 432 bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg()); 433 434 Src0->setImm(NewImm); 435 MI->RemoveOperand(Src1Idx); 436 mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR))); 437 return true; 438 } 439 440 if (Src0->isImm() && !Src1->isImm()) { 441 std::swap(Src0, Src1); 442 std::swap(Src0Idx, Src1Idx); 443 } 444 445 int32_t Src1Val = static_cast<int32_t>(Src1->getImm()); 446 if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) { 447 if (Src1Val == 0) { 448 // y = or x, 0 => y = copy x 449 MI->RemoveOperand(Src1Idx); 450 mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); 451 } else if (Src1Val == -1) { 452 // y = or x, -1 => y = v_mov_b32 -1 453 MI->RemoveOperand(Src1Idx); 454 mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32))); 455 } else 456 return false; 457 458 return true; 459 } 460 461 if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 || 462 MI->getOpcode() == AMDGPU::S_AND_B32) { 463 if (Src1Val == 0) { 464 // y = and x, 0 => y = v_mov_b32 0 465 MI->RemoveOperand(Src0Idx); 466 mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32))); 467 } else if (Src1Val == -1) { 468 // y = and x, -1 => y = copy x 469 MI->RemoveOperand(Src1Idx); 470 mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); 471 stripExtraCopyOperands(*MI); 472 } else 473 return false; 474 475 return true; 476 } 477 478 if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 || 479 MI->getOpcode() == AMDGPU::S_XOR_B32) { 480 if (Src1Val == 0) { 481 // y = xor x, 0 => y = copy x 482 MI->RemoveOperand(Src1Idx); 483 mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); 484 } 485 } 486 487 return false; 488 } 489 490 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 491 if (skipFunction(*MF.getFunction())) 492 return false; 493 494 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 495 496 MachineRegisterInfo &MRI = MF.getRegInfo(); 497 const SIInstrInfo *TII = ST.getInstrInfo(); 498 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 499 500 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 501 BI != BE; ++BI) { 502 503 MachineBasicBlock &MBB = *BI; 504 MachineBasicBlock::iterator I, Next; 505 for (I = MBB.begin(); I != MBB.end(); I = Next) { 506 Next = std::next(I); 507 MachineInstr &MI = *I; 508 509 if (!isSafeToFold(MI)) 510 continue; 511 512 MachineOperand &OpToFold = MI.getOperand(1); 513 bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); 514 515 // FIXME: We could also be folding things like FrameIndexes and 516 // TargetIndexes. 517 if (!FoldingImm && !OpToFold.isReg()) 518 continue; 519 520 if (OpToFold.isReg() && 521 !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) 522 continue; 523 524 // Prevent folding operands backwards in the function. For example, 525 // the COPY opcode must not be replaced by 1 in this example: 526 // 527 // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3 528 // ... 529 // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use> 530 MachineOperand &Dst = MI.getOperand(0); 531 if (Dst.isReg() && 532 !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) 533 continue; 534 535 // We need mutate the operands of new mov instructions to add implicit 536 // uses of EXEC, but adding them invalidates the use_iterator, so defer 537 // this. 538 SmallVector<MachineInstr *, 4> CopiesToReplace; 539 540 std::vector<FoldCandidate> FoldList; 541 if (FoldingImm) { 542 unsigned NumLiteralUses = 0; 543 MachineOperand *NonInlineUse = nullptr; 544 int NonInlineUseOpNo = -1; 545 546 // Try to fold any inline immediate uses, and then only fold other 547 // constants if they have one use. 548 // 549 // The legality of the inline immediate must be checked based on the use 550 // operand, not the defining instruction, because 32-bit instructions 551 // with 32-bit inline immediate sources may be used to materialize 552 // constants used in 16-bit operands. 553 // 554 // e.g. it is unsafe to fold: 555 // s_mov_b32 s0, 1.0 // materializes 0x3f800000 556 // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00 557 558 // Folding immediates with more than one use will increase program size. 559 // FIXME: This will also reduce register usage, which may be better 560 // in some cases. A better heuristic is needed. 561 for (MachineRegisterInfo::use_iterator 562 Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end(); 563 Use != E; ++Use) { 564 MachineInstr *UseMI = Use->getParent(); 565 unsigned OpNo = Use.getOperandNo(); 566 567 if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) { 568 foldOperand(OpToFold, UseMI, OpNo, FoldList, 569 CopiesToReplace, TII, TRI, MRI); 570 } else { 571 if (++NumLiteralUses == 1) { 572 NonInlineUse = &*Use; 573 NonInlineUseOpNo = OpNo; 574 } 575 } 576 } 577 578 if (NumLiteralUses == 1) { 579 MachineInstr *UseMI = NonInlineUse->getParent(); 580 foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, 581 CopiesToReplace, TII, TRI, MRI); 582 } 583 } else { 584 // Folding register. 585 for (MachineRegisterInfo::use_iterator 586 Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end(); 587 Use != E; ++Use) { 588 MachineInstr *UseMI = Use->getParent(); 589 590 foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, 591 CopiesToReplace, TII, TRI, MRI); 592 } 593 } 594 595 // Make sure we add EXEC uses to any new v_mov instructions created. 596 for (MachineInstr *Copy : CopiesToReplace) 597 Copy->addImplicitDefUseOperands(MF); 598 599 for (FoldCandidate &Fold : FoldList) { 600 if (updateOperand(Fold, TRI)) { 601 // Clear kill flags. 602 if (Fold.isReg()) { 603 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 604 // FIXME: Probably shouldn't bother trying to fold if not an 605 // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR 606 // copies. 607 MRI.clearKillFlags(Fold.OpToFold->getReg()); 608 } 609 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 610 static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); 611 612 // Folding the immediate may reveal operations that can be constant 613 // folded or replaced with a copy. This can happen for example after 614 // frame indices are lowered to constants or from splitting 64-bit 615 // constants. 616 tryConstantFoldOp(MRI, TII, Fold.UseMI); 617 } 618 } 619 } 620 } 621 return false; 622 } 623