1 //===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a pass that fixes zero-extension of setcc patterns. 11 // X86 setcc instructions are modeled to have no input arguments, and a single 12 // GR8 output argument. This is consistent with other similar instructions 13 // (e.g. movb), but means it is impossible to directly generate a setcc into 14 // the lower GR8 of a specified GR32. 15 // This means that ISel must select (zext (setcc)) into something like 16 // seta %al; movzbl %al, %eax. 17 // Unfortunately, this can cause a stall due to the partial register write 18 // performed by the setcc. Instead, we can use: 19 // xor %eax, %eax; seta %al 20 // This both avoids the stall, and encodes shorter. 21 //===----------------------------------------------------------------------===// 22 23 #include "X86.h" 24 #include "X86InstrInfo.h" 25 #include "X86Subtarget.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "x86-fixup-setcc" 34 35 STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); 36 37 namespace { 38 class X86FixupSetCCPass : public MachineFunctionPass { 39 public: 40 X86FixupSetCCPass() : MachineFunctionPass(ID) {} 41 42 const char *getPassName() const override { return "X86 Fixup SetCC"; } 43 44 bool runOnMachineFunction(MachineFunction &MF) override; 45 46 private: 47 // Find the preceding instruction that imp-defs eflags. 48 MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB, 49 MachineBasicBlock::reverse_iterator MI); 50 51 // Return true if MI imp-uses eflags. 52 bool impUsesFlags(MachineInstr *MI); 53 54 // Return true if this is the opcode of a SetCC instruction with a register 55 // output. 56 bool isSetCCr(unsigned Opode); 57 58 MachineRegisterInfo *MRI; 59 const X86InstrInfo *TII; 60 61 enum { SearchBound = 16 }; 62 63 static char ID; 64 }; 65 66 char X86FixupSetCCPass::ID = 0; 67 } 68 69 FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } 70 71 bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) { 72 switch (Opcode) { 73 default: 74 return false; 75 case X86::SETOr: 76 case X86::SETNOr: 77 case X86::SETBr: 78 case X86::SETAEr: 79 case X86::SETEr: 80 case X86::SETNEr: 81 case X86::SETBEr: 82 case X86::SETAr: 83 case X86::SETSr: 84 case X86::SETNSr: 85 case X86::SETPr: 86 case X86::SETNPr: 87 case X86::SETLr: 88 case X86::SETGEr: 89 case X86::SETLEr: 90 case X86::SETGr: 91 return true; 92 } 93 } 94 95 // We expect the instruction *immediately* before the setcc to imp-def 96 // EFLAGS (because of scheduling glue). To make this less brittle w.r.t 97 // scheduling, look backwards until we hit the beginning of the 98 // basic-block, or a small bound (to avoid quadratic behavior). 99 MachineInstr * 100 X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB, 101 MachineBasicBlock::reverse_iterator MI) { 102 // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator? 103 auto MBBStart = MBB->rend(); 104 for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI) 105 for (auto &Op : MI->implicit_operands()) 106 if ((Op.getReg() == X86::EFLAGS) && (Op.isDef())) 107 return &*MI; 108 109 return nullptr; 110 } 111 112 bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) { 113 for (auto &Op : MI->implicit_operands()) 114 if ((Op.getReg() == X86::EFLAGS) && (Op.isUse())) 115 return true; 116 117 return false; 118 } 119 120 bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { 121 bool Changed = false; 122 MRI = &MF.getRegInfo(); 123 TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 124 125 SmallVector<MachineInstr*, 4> ToErase; 126 127 for (auto &MBB : MF) { 128 for (auto &MI : MBB) { 129 // Find a setcc that is used by a zext. 130 // This doesn't have to be the only use, the transformation is safe 131 // regardless. 132 if (!isSetCCr(MI.getOpcode())) 133 continue; 134 135 MachineInstr *ZExt = nullptr; 136 for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) 137 if (Use.getOpcode() == X86::MOVZX32rr8) 138 ZExt = &Use; 139 140 if (!ZExt) 141 continue; 142 143 // Find the preceding instruction that imp-defs eflags. 144 MachineInstr *FlagsDefMI = findFlagsImpDef( 145 MI.getParent(), MachineBasicBlock::reverse_iterator(&MI)); 146 if (!FlagsDefMI) 147 continue; 148 149 // We'd like to put something that clobbers eflags directly before 150 // FlagsDefMI. This can't hurt anything after FlagsDefMI, because 151 // it, itself, by definition, clobbers eflags. But it may happen that 152 // FlagsDefMI also *uses* eflags, in which case the transformation is 153 // invalid. 154 if (impUsesFlags(FlagsDefMI)) 155 continue; 156 157 ++NumSubstZexts; 158 Changed = true; 159 160 // On 32-bit, we need to be careful to force an ABCD register. 161 const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit() 162 ? &X86::GR32RegClass 163 : &X86::GR32_ABCDRegClass; 164 unsigned ZeroReg = MRI->createVirtualRegister(RC); 165 unsigned InsertReg = MRI->createVirtualRegister(RC); 166 167 // Initialize a register with 0. This must go before the eflags def 168 BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), 169 ZeroReg); 170 171 // X86 setcc only takes an output GR8, so fake a GR32 input by inserting 172 // the setcc result into the low byte of the zeroed register. 173 BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), 174 TII->get(X86::INSERT_SUBREG), InsertReg) 175 .addReg(ZeroReg) 176 .addReg(MI.getOperand(0).getReg()) 177 .addImm(X86::sub_8bit); 178 MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg); 179 ToErase.push_back(ZExt); 180 } 181 } 182 183 for (auto &I : ToErase) 184 I->eraseFromParent(); 185 186 return Changed; 187 } 188