1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 11 /// flag bits. 12 /// 13 /// We have to do this by carefully analyzing and rewriting the usage of the 14 /// copied EFLAGS register because there is no general way to rematerialize the 15 /// entire EFLAGS register safely and efficiently. Using `popf` both forces 16 /// dynamic stack adjustment and can create correctness issues due to IF, TF, 17 /// and other non-status flags being overwritten. Using sequences involving 18 /// SAHF don't work on all x86 processors and are often quite slow compared to 19 /// directly testing a single status preserved in its own GPR. 20 /// 21 //===----------------------------------------------------------------------===// 22 23 #include "X86.h" 24 #include "X86InstrInfo.h" 25 #include "X86Subtarget.h" 26 #include "llvm/ADT/DepthFirstIterator.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/ScopeExit.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/CodeGen/MachineBasicBlock.h" 34 #include "llvm/CodeGen/MachineConstantPool.h" 35 #include "llvm/CodeGen/MachineDominators.h" 36 #include "llvm/CodeGen/MachineFunction.h" 37 #include "llvm/CodeGen/MachineFunctionPass.h" 38 #include "llvm/CodeGen/MachineInstr.h" 39 #include "llvm/CodeGen/MachineInstrBuilder.h" 40 #include "llvm/CodeGen/MachineModuleInfo.h" 41 #include "llvm/CodeGen/MachineOperand.h" 42 #include "llvm/CodeGen/MachineRegisterInfo.h" 43 #include "llvm/CodeGen/MachineSSAUpdater.h" 44 #include "llvm/CodeGen/TargetInstrInfo.h" 45 #include "llvm/CodeGen/TargetRegisterInfo.h" 46 #include "llvm/CodeGen/TargetSchedule.h" 47 #include "llvm/CodeGen/TargetSubtargetInfo.h" 48 #include "llvm/IR/DebugLoc.h" 49 #include "llvm/MC/MCSchedule.h" 50 #include "llvm/Pass.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include <algorithm> 54 #include <cassert> 55 #include <iterator> 56 #include <utility> 57 58 using namespace llvm; 59 60 #define PASS_KEY "x86-flags-copy-lowering" 61 #define DEBUG_TYPE PASS_KEY 62 63 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 64 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 65 STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 66 STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 67 STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to"); 68 69 namespace { 70 71 // Convenient array type for storing registers associated with each condition. 72 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 73 74 class X86FlagsCopyLoweringPass : public MachineFunctionPass { 75 public: 76 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {} 77 78 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 79 bool runOnMachineFunction(MachineFunction &MF) override; 80 void getAnalysisUsage(AnalysisUsage &AU) const override; 81 82 /// Pass identification, replacement for typeid. 83 static char ID; 84 85 private: 86 MachineRegisterInfo *MRI = nullptr; 87 const X86Subtarget *Subtarget = nullptr; 88 const X86InstrInfo *TII = nullptr; 89 const TargetRegisterInfo *TRI = nullptr; 90 const TargetRegisterClass *PromoteRC = nullptr; 91 MachineDominatorTree *MDT = nullptr; 92 93 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator CopyDefI); 95 96 Register promoteCondToReg(MachineBasicBlock &MBB, 97 MachineBasicBlock::iterator TestPos, 98 const DebugLoc &TestLoc, X86::CondCode Cond); 99 std::pair<unsigned, bool> getCondOrInverseInReg( 100 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 101 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs); 102 void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 103 const DebugLoc &Loc, unsigned Reg); 104 105 void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 106 const DebugLoc &Loc, MachineInstr &MI, 107 CondRegArray &CondRegs); 108 void rewriteArithmetic(MachineBasicBlock &MBB, 109 MachineBasicBlock::iterator Pos, const DebugLoc &Loc, 110 MachineInstr &MI, CondRegArray &CondRegs); 111 void rewriteMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 112 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs); 113 }; 114 115 } // end anonymous namespace 116 117 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 118 "X86 EFLAGS copy lowering", false, false) 119 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 120 "X86 EFLAGS copy lowering", false, false) 121 122 FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 123 return new X86FlagsCopyLoweringPass(); 124 } 125 126 char X86FlagsCopyLoweringPass::ID = 0; 127 128 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 129 AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>(); 130 MachineFunctionPass::getAnalysisUsage(AU); 131 } 132 133 static bool isArithmeticOp(unsigned Opc) { 134 return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) || 135 X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r); 136 } 137 138 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 139 MachineInstr &SplitI, 140 const X86InstrInfo &TII) { 141 MachineFunction &MF = *MBB.getParent(); 142 143 assert(SplitI.getParent() == &MBB && 144 "Split instruction must be in the split block!"); 145 assert(SplitI.isBranch() && 146 "Only designed to split a tail of branch instructions!"); 147 assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID && 148 "Must split on an actual jCC instruction!"); 149 150 // Dig out the previous instruction to the split point. 151 MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 152 assert(PrevI.isBranch() && "Must split after a branch!"); 153 assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID && 154 "Must split after an actual jCC instruction!"); 155 assert(!std::prev(PrevI.getIterator())->isTerminator() && 156 "Must only have this one terminator prior to the split!"); 157 158 // Grab the one successor edge that will stay in `MBB`. 159 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 160 161 // Analyze the original block to see if we are actually splitting an edge 162 // into two edges. This can happen when we have multiple conditional jumps to 163 // the same successor. 164 bool IsEdgeSplit = 165 std::any_of(SplitI.getIterator(), MBB.instr_end(), 166 [&](MachineInstr &MI) { 167 assert(MI.isTerminator() && 168 "Should only have spliced terminators!"); 169 return llvm::any_of( 170 MI.operands(), [&](MachineOperand &MOp) { 171 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 172 }); 173 }) || 174 MBB.getFallThrough() == &UnsplitSucc; 175 176 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 177 178 // Insert the new block immediately after the current one. Any existing 179 // fallthrough will be sunk into this new block anyways. 180 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 181 182 // Splice the tail of instructions into the new block. 183 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 184 185 // Copy the necessary succesors (and their probability info) into the new 186 // block. 187 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 188 if (IsEdgeSplit || *SI != &UnsplitSucc) 189 NewMBB.copySuccessor(&MBB, SI); 190 // Normalize the probabilities if we didn't end up splitting the edge. 191 if (!IsEdgeSplit) 192 NewMBB.normalizeSuccProbs(); 193 194 // Now replace all of the moved successors in the original block with the new 195 // block. This will merge their probabilities. 196 for (MachineBasicBlock *Succ : NewMBB.successors()) 197 if (Succ != &UnsplitSucc) 198 MBB.replaceSuccessor(Succ, &NewMBB); 199 200 // We should always end up replacing at least one successor. 201 assert(MBB.isSuccessor(&NewMBB) && 202 "Failed to make the new block a successor!"); 203 204 // Now update all the PHIs. 205 for (MachineBasicBlock *Succ : NewMBB.successors()) { 206 for (MachineInstr &MI : *Succ) { 207 if (!MI.isPHI()) 208 break; 209 210 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 211 OpIdx += 2) { 212 MachineOperand &OpV = MI.getOperand(OpIdx); 213 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 214 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 215 if (OpMBB.getMBB() != &MBB) 216 continue; 217 218 // Replace the operand for unsplit successors 219 if (!IsEdgeSplit || Succ != &UnsplitSucc) { 220 OpMBB.setMBB(&NewMBB); 221 222 // We have to continue scanning as there may be multiple entries in 223 // the PHI. 224 continue; 225 } 226 227 // When we have split the edge append a new successor. 228 MI.addOperand(MF, OpV); 229 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 230 break; 231 } 232 } 233 } 234 235 return NewMBB; 236 } 237 238 enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber }; 239 240 static EFLAGSClobber getClobberType(const MachineInstr &MI) { 241 const MachineOperand *FlagDef = 242 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 243 if (!FlagDef) 244 return NoClobber; 245 if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode())) 246 return EvitableClobber; 247 248 return InevitableClobber; 249 } 250 251 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 252 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 253 << " **********\n"); 254 255 Subtarget = &MF.getSubtarget<X86Subtarget>(); 256 MRI = &MF.getRegInfo(); 257 TII = Subtarget->getInstrInfo(); 258 TRI = Subtarget->getRegisterInfo(); 259 PromoteRC = &X86::GR8RegClass; 260 261 if (MF.empty()) 262 // Nothing to do for a degenerate empty function... 263 return false; 264 265 if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) { 266 return MI.getOpcode() == TargetOpcode::COPY; 267 })) 268 return false; 269 270 // We change the code, so we don't preserve the dominator tree anyway. If we 271 // got a valid MDT from the pass manager, use that, otherwise construct one 272 // now. This is an optimization that avoids unnecessary MDT construction for 273 // functions that have no flag copies. 274 275 auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); 276 std::unique_ptr<MachineDominatorTree> OwnedMDT; 277 if (MDTWrapper) { 278 MDT = &MDTWrapper->getDomTree(); 279 } else { 280 OwnedMDT = std::make_unique<MachineDominatorTree>(MF); 281 MDT = OwnedMDT.get(); 282 } 283 284 // Collect the copies in RPO so that when there are chains where a copy is in 285 // turn copied again we visit the first one first. This ensures we can find 286 // viable locations for testing the original EFLAGS that dominate all the 287 // uses across complex CFGs. 288 SmallSetVector<MachineInstr *, 4> Copies; 289 ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); 290 for (MachineBasicBlock *MBB : RPOT) 291 for (MachineInstr &MI : *MBB) 292 if (MI.getOpcode() == TargetOpcode::COPY && 293 MI.getOperand(0).getReg() == X86::EFLAGS) 294 Copies.insert(&MI); 295 296 // Try to elminate the copys by transform the instructions between copy and 297 // copydef to the NF (no flags update) variants, e.g. 298 // 299 // %1:gr64 = COPY $eflags 300 // OP1 implicit-def dead $eflags 301 // $eflags = COPY %1 302 // OP2 cc, implicit $eflags 303 // 304 // -> 305 // 306 // OP1_NF 307 // OP2 implicit $eflags 308 if (Subtarget->hasNF()) { 309 SmallSetVector<MachineInstr *, 4> RemovedCopies; 310 // CopyIIt may be invalidated by removing copies. 311 auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end(); 312 while (CopyIIt != CopyIEnd) { 313 auto NCopyIIt = std::next(CopyIIt); 314 SmallSetVector<MachineInstr *, 4> EvitableClobbers; 315 MachineInstr *CopyI = *CopyIIt; 316 MachineOperand &VOp = CopyI->getOperand(1); 317 MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg()); 318 MachineBasicBlock *CopyIMBB = CopyI->getParent(); 319 MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent(); 320 // Walk all basic blocks reachable in depth-first iteration on the inverse 321 // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that 322 // may be executed between the execution of CopyDefIMBB and CopyIMBB. On 323 // all execution paths, instructions from CopyDefI to CopyI (exclusive) 324 // has to be NF-convertible if it clobbers flags. 325 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE; 326 ++BI) { 327 MachineBasicBlock *MBB = *BI; 328 for (auto I = (MBB != CopyDefIMBB) 329 ? MBB->begin() 330 : std::next(MachineBasicBlock::iterator(CopyDefI)), 331 E = (MBB != CopyIMBB) ? MBB->end() 332 : MachineBasicBlock::iterator(CopyI); 333 I != E; ++I) { 334 MachineInstr &MI = *I; 335 EFLAGSClobber ClobberType = getClobberType(MI); 336 if (ClobberType == NoClobber) 337 continue; 338 339 if (ClobberType == InevitableClobber) 340 goto ProcessNextCopyI; 341 342 assert(ClobberType == EvitableClobber && "unexpected workflow"); 343 EvitableClobbers.insert(&MI); 344 } 345 } 346 // Covert evitable clobbers into NF variants and remove the copyies. 347 RemovedCopies.insert(CopyI); 348 CopyI->eraseFromParent(); 349 if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) { 350 RemovedCopies.insert(CopyDefI); 351 CopyDefI->eraseFromParent(); 352 } 353 ++NumCopiesEliminated; 354 for (auto *Clobber : EvitableClobbers) { 355 unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode()); 356 assert(NewOpc && "evitable clobber must have a NF variant"); 357 Clobber->setDesc(TII->get(NewOpc)); 358 Clobber->removeOperand( 359 Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr) 360 ->getOperandNo()); 361 ++NumNFsConvertedTo; 362 } 363 // Update liveins for basic blocks in the path 364 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE; 365 ++BI) 366 if (*BI != CopyDefIMBB) 367 BI->addLiveIn(X86::EFLAGS); 368 ProcessNextCopyI: 369 CopyIIt = NCopyIIt; 370 } 371 Copies.set_subtract(RemovedCopies); 372 } 373 374 // For the rest of copies that cannot be eliminated by NF transform, we use 375 // setcc to preserve the flags in GPR32 before OP1, and recheck its value 376 // before using the flags, e.g. 377 // 378 // %1:gr64 = COPY $eflags 379 // OP1 implicit-def dead $eflags 380 // $eflags = COPY %1 381 // OP2 cc, implicit $eflags 382 // 383 // -> 384 // 385 // %1:gr8 = SETCCr cc, implicit $eflags 386 // OP1 implicit-def dead $eflags 387 // TEST8rr %1, %1, implicit-def $eflags 388 // OP2 ne, implicit $eflags 389 for (MachineInstr *CopyI : Copies) { 390 MachineBasicBlock &MBB = *CopyI->getParent(); 391 392 MachineOperand &VOp = CopyI->getOperand(1); 393 assert(VOp.isReg() && 394 "The input to the copy for EFLAGS should always be a register!"); 395 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 396 if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 397 // FIXME: The big likely candidate here are PHI nodes. We could in theory 398 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 399 // enough that it is probably better to change every other part of LLVM 400 // to avoid creating them. The issue is that once we have PHIs we won't 401 // know which original EFLAGS value we need to capture with our setCCs 402 // below. The end result will be computing a complete set of setCCs that 403 // we *might* want, computing them in every place where we copy *out* of 404 // EFLAGS and then doing SSA formation on all of them to insert necessary 405 // PHI nodes and consume those here. Then hoping that somehow we DCE the 406 // unnecessary ones. This DCE seems very unlikely to be successful and so 407 // we will almost certainly end up with a glut of dead setCC 408 // instructions. Until we have a motivating test case and fail to avoid 409 // it by changing other parts of LLVM's lowering, we refuse to handle 410 // this complex case here. 411 LLVM_DEBUG( 412 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 413 CopyDefI.dump()); 414 report_fatal_error( 415 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 416 } 417 418 auto Cleanup = make_scope_exit([&] { 419 // All uses of the EFLAGS copy are now rewritten, kill the copy into 420 // eflags and if dead the copy from. 421 CopyI->eraseFromParent(); 422 if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 423 CopyDefI.eraseFromParent(); 424 ++NumCopiesEliminated; 425 }); 426 427 MachineOperand &DOp = CopyI->getOperand(0); 428 assert(DOp.isDef() && "Expected register def!"); 429 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 430 if (DOp.isDead()) 431 continue; 432 433 MachineBasicBlock *TestMBB = CopyDefI.getParent(); 434 auto TestPos = CopyDefI.getIterator(); 435 DebugLoc TestLoc = CopyDefI.getDebugLoc(); 436 437 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 438 439 // Walk up across live-in EFLAGS to find where they were actually def'ed. 440 // 441 // This copy's def may just be part of a region of blocks covered by 442 // a single def of EFLAGS and we want to find the top of that region where 443 // possible. 444 // 445 // This is essentially a search for a *candidate* reaching definition 446 // location. We don't need to ever find the actual reaching definition here, 447 // but we want to walk up the dominator tree to find the highest point which 448 // would be viable for such a definition. 449 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin, 450 MachineBasicBlock::iterator End) { 451 // Scan backwards as we expect these to be relatively short and often find 452 // a clobber near the end. 453 return llvm::any_of( 454 llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) { 455 // Flag any instruction (other than the copy we are 456 // currently rewriting) that defs EFLAGS. 457 return &MI != CopyI && 458 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 459 }); 460 }; 461 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB, 462 MachineBasicBlock *EndMBB) { 463 assert(MDT->dominates(BeginMBB, EndMBB) && 464 "Only support paths down the dominator tree!"); 465 SmallPtrSet<MachineBasicBlock *, 4> Visited; 466 SmallVector<MachineBasicBlock *, 4> Worklist; 467 // We terminate at the beginning. No need to scan it. 468 Visited.insert(BeginMBB); 469 Worklist.push_back(EndMBB); 470 do { 471 auto *MBB = Worklist.pop_back_val(); 472 for (auto *PredMBB : MBB->predecessors()) { 473 if (!Visited.insert(PredMBB).second) 474 continue; 475 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end())) 476 return true; 477 // Enqueue this block to walk its predecessors. 478 Worklist.push_back(PredMBB); 479 } 480 } while (!Worklist.empty()); 481 // No clobber found along a path from the begin to end. 482 return false; 483 }; 484 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() && 485 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) { 486 // Find the nearest common dominator of the predecessors, as 487 // that will be the best candidate to hoist into. 488 MachineBasicBlock *HoistMBB = 489 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(), 490 *TestMBB->pred_begin(), 491 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) { 492 return MDT->findNearestCommonDominator(LHS, RHS); 493 }); 494 495 // Now we need to scan all predecessors that may be reached along paths to 496 // the hoist block. A clobber anywhere in any of these blocks the hoist. 497 // Note that this even handles loops because we require *no* clobbers. 498 if (HasEFLAGSClobberPath(HoistMBB, TestMBB)) 499 break; 500 501 // We also need the terminators to not sneakily clobber flags. 502 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(), 503 HoistMBB->instr_end())) 504 break; 505 506 // We found a viable location, hoist our test position to it. 507 TestMBB = HoistMBB; 508 TestPos = TestMBB->getFirstTerminator()->getIterator(); 509 // Clear the debug location as it would just be confusing after hoisting. 510 TestLoc = DebugLoc(); 511 } 512 LLVM_DEBUG({ 513 auto DefIt = llvm::find_if( 514 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)), 515 [&](MachineInstr &MI) { 516 return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 517 }); 518 if (DefIt.base() != TestMBB->instr_begin()) { 519 dbgs() << " Using EFLAGS defined by: "; 520 DefIt->dump(); 521 } else { 522 dbgs() << " Using live-in flags for BB:\n"; 523 TestMBB->dump(); 524 } 525 }); 526 527 // While rewriting uses, we buffer jumps and rewrite them in a second pass 528 // because doing so will perturb the CFG that we are walking to find the 529 // uses in the first place. 530 SmallVector<MachineInstr *, 4> JmpIs; 531 532 // Gather the condition flags that have already been preserved in 533 // registers. We do this from scratch each time as we expect there to be 534 // very few of them and we expect to not revisit the same copy definition 535 // many times. If either of those change sufficiently we could build a map 536 // of these up front instead. 537 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos); 538 539 // Collect the basic blocks we need to scan. Typically this will just be 540 // a single basic block but we may have to scan multiple blocks if the 541 // EFLAGS copy lives into successors. 542 SmallVector<MachineBasicBlock *, 2> Blocks; 543 SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; 544 Blocks.push_back(&MBB); 545 546 do { 547 MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); 548 549 // Track when if/when we find a kill of the flags in this block. 550 bool FlagsKilled = false; 551 552 // In most cases, we walk from the beginning to the end of the block. But 553 // when the block is the same block as the copy is from, we will visit it 554 // twice. The first time we start from the copy and go to the end. The 555 // second time we start from the beginning and go to the copy. This lets 556 // us handle copies inside of cycles. 557 // FIXME: This loop is *super* confusing. This is at least in part 558 // a symptom of all of this routine needing to be refactored into 559 // documentable components. Once done, there may be a better way to write 560 // this loop. 561 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB)) 562 ? std::next(CopyI->getIterator()) 563 : UseMBB.instr_begin(), 564 MIE = UseMBB.instr_end(); 565 MII != MIE;) { 566 MachineInstr &MI = *MII++; 567 // If we are in the original copy block and encounter either the copy 568 // def or the copy itself, break so that we don't re-process any part of 569 // the block or process the instructions in the range that was copied 570 // over. 571 if (&MI == CopyI || &MI == &CopyDefI) { 572 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) && 573 "Should only encounter these on the second pass over the " 574 "original block."); 575 break; 576 } 577 578 MachineOperand *FlagUse = 579 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr); 580 FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI); 581 582 if (!FlagUse && FlagsKilled) 583 break; 584 else if (!FlagUse) 585 continue; 586 587 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 588 589 // Check the kill flag before we rewrite as that may change it. 590 if (FlagUse->isKill()) 591 FlagsKilled = true; 592 593 // Once we encounter a branch, the rest of the instructions must also be 594 // branches. We can't rewrite in place here, so we handle them below. 595 // 596 // Note that we don't have to handle tail calls here, even conditional 597 // tail calls, as those are not introduced into the X86 MI until post-RA 598 // branch folding or black placement. As a consequence, we get to deal 599 // with the simpler formulation of conditional branches followed by tail 600 // calls. 601 if (X86::getCondFromBranch(MI) != X86::COND_INVALID) { 602 auto JmpIt = MI.getIterator(); 603 do { 604 JmpIs.push_back(&*JmpIt); 605 ++JmpIt; 606 } while (JmpIt != UseMBB.instr_end() && 607 X86::getCondFromBranch(*JmpIt) != X86::COND_INVALID); 608 break; 609 } 610 611 // Otherwise we can just rewrite in-place. 612 unsigned Opc = MI.getOpcode(); 613 if (Opc == TargetOpcode::COPY) { 614 // Just replace this copy with the original copy def. 615 MRI->replaceRegWith(MI.getOperand(0).getReg(), 616 CopyDefI.getOperand(0).getReg()); 617 MI.eraseFromParent(); 618 } else if (X86::isSETCC(Opc)) { 619 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs); 620 } else if (isArithmeticOp(Opc)) { 621 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs); 622 } else { 623 rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs); 624 } 625 626 // If this was the last use of the flags, we're done. 627 if (FlagsKilled) 628 break; 629 } 630 631 // If the flags were killed, we're done with this block. 632 if (FlagsKilled) 633 continue; 634 635 // Otherwise we need to scan successors for ones where the flags live-in 636 // and queue those up for processing. 637 for (MachineBasicBlock *SuccMBB : UseMBB.successors()) 638 if (SuccMBB->isLiveIn(X86::EFLAGS) && 639 VisitedBlocks.insert(SuccMBB).second) { 640 // We currently don't do any PHI insertion and so we require that the 641 // test basic block dominates all of the use basic blocks. Further, we 642 // can't have a cycle from the test block back to itself as that would 643 // create a cycle requiring a PHI to break it. 644 // 645 // We could in theory do PHI insertion here if it becomes useful by 646 // just taking undef values in along every edge that we don't trace 647 // this EFLAGS copy along. This isn't as bad as fully general PHI 648 // insertion, but still seems like a great deal of complexity. 649 // 650 // Because it is theoretically possible that some earlier MI pass or 651 // other lowering transformation could induce this to happen, we do 652 // a hard check even in non-debug builds here. 653 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) { 654 LLVM_DEBUG({ 655 dbgs() 656 << "ERROR: Encountered use that is not dominated by our test " 657 "basic block! Rewriting this would require inserting PHI " 658 "nodes to track the flag state across the CFG.\n\nTest " 659 "block:\n"; 660 TestMBB->dump(); 661 dbgs() << "Use block:\n"; 662 SuccMBB->dump(); 663 }); 664 report_fatal_error( 665 "Cannot lower EFLAGS copy when original copy def " 666 "does not dominate all uses."); 667 } 668 669 Blocks.push_back(SuccMBB); 670 671 // After this, EFLAGS will be recreated before each use. 672 SuccMBB->removeLiveIn(X86::EFLAGS); 673 } 674 } while (!Blocks.empty()); 675 676 // Now rewrite the jumps that use the flags. These we handle specially 677 // because if there are multiple jumps in a single basic block we'll have 678 // to do surgery on the CFG. 679 MachineBasicBlock *LastJmpMBB = nullptr; 680 for (MachineInstr *JmpI : JmpIs) { 681 // Past the first jump within a basic block we need to split the blocks 682 // apart. 683 if (JmpI->getParent() == LastJmpMBB) 684 splitBlock(*JmpI->getParent(), *JmpI, *TII); 685 else 686 LastJmpMBB = JmpI->getParent(); 687 688 rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 689 } 690 691 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 692 // the copy's def operand is itself a kill. 693 } 694 695 #ifndef NDEBUG 696 for (MachineBasicBlock &MBB : MF) 697 for (MachineInstr &MI : MBB) 698 if (MI.getOpcode() == TargetOpcode::COPY && 699 (MI.getOperand(0).getReg() == X86::EFLAGS || 700 MI.getOperand(1).getReg() == X86::EFLAGS)) { 701 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; 702 MI.dump()); 703 llvm_unreachable("Unlowered EFLAGS copy!"); 704 } 705 #endif 706 707 return true; 708 } 709 710 /// Collect any conditions that have already been set in registers so that we 711 /// can re-use them rather than adding duplicates. 712 CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( 713 MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) { 714 CondRegArray CondRegs = {}; 715 716 // Scan backwards across the range of instructions with live EFLAGS. 717 for (MachineInstr &MI : 718 llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { 719 X86::CondCode Cond = X86::getCondFromSETCC(MI); 720 if (Cond != X86::COND_INVALID && !MI.mayStore() && 721 MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) { 722 assert(MI.getOperand(0).isDef() && 723 "A non-storing SETcc should always define a register!"); 724 CondRegs[Cond] = MI.getOperand(0).getReg(); 725 } 726 727 // Stop scanning when we see the first definition of the EFLAGS as prior to 728 // this we would potentially capture the wrong flag state. 729 if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) 730 break; 731 } 732 return CondRegs; 733 } 734 735 Register X86FlagsCopyLoweringPass::promoteCondToReg( 736 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 737 const DebugLoc &TestLoc, X86::CondCode Cond) { 738 Register Reg = MRI->createVirtualRegister(PromoteRC); 739 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg) 740 .addImm(Cond); 741 (void)SetI; 742 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump()); 743 ++NumSetCCsInserted; 744 return Reg; 745 } 746 747 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 748 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 749 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 750 unsigned &CondReg = CondRegs[Cond]; 751 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 752 if (!CondReg && !InvCondReg) 753 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 754 755 if (CondReg) 756 return {CondReg, false}; 757 else 758 return {InvCondReg, true}; 759 } 760 761 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 762 MachineBasicBlock::iterator Pos, 763 const DebugLoc &Loc, unsigned Reg) { 764 auto TestI = 765 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); 766 (void)TestI; 767 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump()); 768 ++NumTestsInserted; 769 } 770 771 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB, 772 MachineBasicBlock::iterator Pos, 773 const DebugLoc &Loc, 774 MachineInstr &MI, 775 CondRegArray &CondRegs) { 776 X86::CondCode Cond = X86::getCondFromSETCC(MI); 777 // Note that we can't usefully rewrite this to the inverse without complex 778 // analysis of the users of the setCC. Largely we rely on duplicates which 779 // could have been avoided already being avoided here. 780 unsigned &CondReg = CondRegs[Cond]; 781 if (!CondReg) 782 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond); 783 784 // Rewriting a register def is trivial: we just replace the register and 785 // remove the setcc. 786 if (!MI.mayStore()) { 787 assert(MI.getOperand(0).isReg() && 788 "Cannot have a non-register defined operand to SETcc!"); 789 Register OldReg = MI.getOperand(0).getReg(); 790 // Drop Kill flags on the old register before replacing. CondReg may have 791 // a longer live range. 792 MRI->clearKillFlags(OldReg); 793 MRI->replaceRegWith(OldReg, CondReg); 794 MI.eraseFromParent(); 795 return; 796 } 797 798 // Otherwise, we need to emit a store. 799 auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), 800 TII->get(X86::MOV8mr)); 801 // Copy the address operands. 802 for (int i = 0; i < X86::AddrNumOperands; ++i) 803 MIB.add(MI.getOperand(i)); 804 805 MIB.addReg(CondReg); 806 MIB.setMemRefs(MI.memoperands()); 807 MI.eraseFromParent(); 808 } 809 810 void X86FlagsCopyLoweringPass::rewriteArithmetic( 811 MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 812 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) { 813 // Arithmetic is either reading CF or OF. 814 X86::CondCode Cond = X86::COND_B; // CF == 1 815 // The addend to use to reset CF or OF when added to the flag value. 816 // Set up an addend that when one is added will need a carry due to not 817 // having a higher bit available. 818 int Addend = 255; 819 820 // Now get a register that contains the value of the flag input to the 821 // arithmetic. We require exactly this flag to simplify the arithmetic 822 // required to materialize it back into the flag. 823 unsigned &CondReg = CondRegs[Cond]; 824 if (!CondReg) 825 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond); 826 827 // Insert an instruction that will set the flag back to the desired value. 828 Register TmpReg = MRI->createVirtualRegister(PromoteRC); 829 auto AddI = 830 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), 831 TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri)) 832 .addDef(TmpReg, RegState::Dead) 833 .addReg(CondReg) 834 .addImm(Addend); 835 (void)AddI; 836 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump()); 837 ++NumAddsInserted; 838 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true); 839 } 840 841 static X86::CondCode getImplicitCondFromMI(unsigned Opc) { 842 #define FROM_TO(A, B) \ 843 case X86::CMOV##A##_Fp32: \ 844 case X86::CMOV##A##_Fp64: \ 845 case X86::CMOV##A##_Fp80: \ 846 return X86::COND_##B; 847 848 switch (Opc) { 849 default: 850 return X86::COND_INVALID; 851 FROM_TO(B, B) 852 FROM_TO(E, E) 853 FROM_TO(P, P) 854 FROM_TO(BE, BE) 855 FROM_TO(NB, AE) 856 FROM_TO(NE, NE) 857 FROM_TO(NP, NP) 858 FROM_TO(NBE, A) 859 } 860 #undef FROM_TO 861 } 862 863 static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) { 864 assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC"); 865 #define CASE(A) \ 866 case X86::CMOVB_##A: \ 867 case X86::CMOVE_##A: \ 868 case X86::CMOVP_##A: \ 869 case X86::CMOVBE_##A: \ 870 case X86::CMOVNB_##A: \ 871 case X86::CMOVNE_##A: \ 872 case X86::CMOVNP_##A: \ 873 case X86::CMOVNBE_##A: \ 874 return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A; 875 switch (Opc) { 876 default: 877 llvm_unreachable("Unexpected opcode"); 878 CASE(Fp32) 879 CASE(Fp64) 880 CASE(Fp80) 881 } 882 #undef CASE 883 } 884 885 void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB, 886 MachineBasicBlock::iterator Pos, 887 const DebugLoc &Loc, MachineInstr &MI, 888 CondRegArray &CondRegs) { 889 // First get the register containing this specific condition. 890 bool IsImplicitCC = false; 891 X86::CondCode CC = X86::getCondFromMI(MI); 892 if (CC == X86::COND_INVALID) { 893 CC = getImplicitCondFromMI(MI.getOpcode()); 894 IsImplicitCC = true; 895 } 896 assert(CC != X86::COND_INVALID && "Unknown EFLAG user!"); 897 unsigned CondReg; 898 bool Inverted; 899 std::tie(CondReg, Inverted) = 900 getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs); 901 902 // Insert a direct test of the saved register. 903 insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg); 904 905 // Rewrite the instruction to use the !ZF flag from the test, and then kill 906 // its use of the flags afterward. 907 X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE; 908 if (IsImplicitCC) 909 MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC))); 910 else 911 MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC); 912 913 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true); 914 LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump()); 915 } 916