1 //===- X86InstructionSelector.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// X86. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86BaseInfo.h" 15 #include "X86.h" 16 #include "X86InstrBuilder.h" 17 #include "X86InstrInfo.h" 18 #include "X86RegisterBankInfo.h" 19 #include "X86RegisterInfo.h" 20 #include "X86Subtarget.h" 21 #include "X86TargetMachine.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineConstantPool.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineMemOperand.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterBank.h" 35 #include "llvm/CodeGen/TargetOpcodes.h" 36 #include "llvm/CodeGen/TargetRegisterInfo.h" 37 #include "llvm/CodeGenTypes/LowLevelType.h" 38 #include "llvm/IR/DataLayout.h" 39 #include "llvm/IR/InstrTypes.h" 40 #include "llvm/IR/IntrinsicsX86.h" 41 #include "llvm/Support/CodeGen.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include <cassert> 47 #include <cstdint> 48 #include <tuple> 49 50 #define DEBUG_TYPE "X86-isel" 51 52 using namespace llvm; 53 54 namespace { 55 56 #define GET_GLOBALISEL_PREDICATE_BITSET 57 #include "X86GenGlobalISel.inc" 58 #undef GET_GLOBALISEL_PREDICATE_BITSET 59 60 class X86InstructionSelector : public InstructionSelector { 61 public: 62 X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, 63 const X86RegisterBankInfo &RBI); 64 65 bool select(MachineInstr &I) override; 66 static const char *getName() { return DEBUG_TYPE; } 67 68 private: 69 /// tblgen-erated 'select' implementation, used as the initial selector for 70 /// the patterns that don't require complex C++. 71 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 72 73 // TODO: remove after supported by Tablegen-erated instruction selection. 74 unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc, 75 Align Alignment) const; 76 77 bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, 78 MachineFunction &MF) const; 79 bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, 80 MachineFunction &MF) const; 81 bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, 82 MachineFunction &MF) const; 83 bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, 84 MachineFunction &MF) const; 85 bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI, 86 MachineFunction &MF) const; 87 bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, 88 MachineFunction &MF) const; 89 bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, 90 MachineFunction &MF) const; 91 bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, 92 MachineFunction &MF) const; 93 bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, 94 MachineFunction &MF) const; 95 bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI, 96 MachineFunction &MF) const; 97 bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const; 98 bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; 99 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 100 MachineFunction &MF); 101 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 102 MachineFunction &MF); 103 bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, 104 MachineFunction &MF) const; 105 bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, 106 MachineFunction &MF) const; 107 bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, 108 MachineFunction &MF) const; 109 bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, 110 const unsigned DstReg, 111 const TargetRegisterClass *DstRC, 112 const unsigned SrcReg, 113 const TargetRegisterClass *SrcRC) const; 114 bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, 115 MachineFunction &MF) const; 116 bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; 117 bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI, 118 MachineFunction &MF) const; 119 bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI, 120 MachineFunction &MF) const; 121 122 // emit insert subreg instruction and insert it before MachineInstr &I 123 bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 124 MachineRegisterInfo &MRI, MachineFunction &MF) const; 125 // emit extract subreg instruction and insert it before MachineInstr &I 126 bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 127 MachineRegisterInfo &MRI, MachineFunction &MF) const; 128 129 const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; 130 const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, 131 MachineRegisterInfo &MRI) const; 132 133 const X86TargetMachine &TM; 134 const X86Subtarget &STI; 135 const X86InstrInfo &TII; 136 const X86RegisterInfo &TRI; 137 const X86RegisterBankInfo &RBI; 138 139 #define GET_GLOBALISEL_PREDICATES_DECL 140 #include "X86GenGlobalISel.inc" 141 #undef GET_GLOBALISEL_PREDICATES_DECL 142 143 #define GET_GLOBALISEL_TEMPORARIES_DECL 144 #include "X86GenGlobalISel.inc" 145 #undef GET_GLOBALISEL_TEMPORARIES_DECL 146 }; 147 148 } // end anonymous namespace 149 150 #define GET_GLOBALISEL_IMPL 151 #include "X86GenGlobalISel.inc" 152 #undef GET_GLOBALISEL_IMPL 153 154 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, 155 const X86Subtarget &STI, 156 const X86RegisterBankInfo &RBI) 157 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), 158 RBI(RBI), 159 #define GET_GLOBALISEL_PREDICATES_INIT 160 #include "X86GenGlobalISel.inc" 161 #undef GET_GLOBALISEL_PREDICATES_INIT 162 #define GET_GLOBALISEL_TEMPORARIES_INIT 163 #include "X86GenGlobalISel.inc" 164 #undef GET_GLOBALISEL_TEMPORARIES_INIT 165 { 166 } 167 168 // FIXME: This should be target-independent, inferred from the types declared 169 // for each class in the bank. 170 const TargetRegisterClass * 171 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { 172 if (RB.getID() == X86::GPRRegBankID) { 173 if (Ty.getSizeInBits() <= 8) 174 return &X86::GR8RegClass; 175 if (Ty.getSizeInBits() == 16) 176 return &X86::GR16RegClass; 177 if (Ty.getSizeInBits() == 32) 178 return &X86::GR32RegClass; 179 if (Ty.getSizeInBits() == 64) 180 return &X86::GR64RegClass; 181 } 182 if (RB.getID() == X86::VECRRegBankID) { 183 if (Ty.getSizeInBits() == 16) 184 return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 185 if (Ty.getSizeInBits() == 32) 186 return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 187 if (Ty.getSizeInBits() == 64) 188 return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 189 if (Ty.getSizeInBits() == 128) 190 return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; 191 if (Ty.getSizeInBits() == 256) 192 return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; 193 if (Ty.getSizeInBits() == 512) 194 return &X86::VR512RegClass; 195 } 196 197 if (RB.getID() == X86::PSRRegBankID) { 198 if (Ty.getSizeInBits() == 80) 199 return &X86::RFP80RegClass; 200 if (Ty.getSizeInBits() == 64) 201 return &X86::RFP64RegClass; 202 if (Ty.getSizeInBits() == 32) 203 return &X86::RFP32RegClass; 204 } 205 206 llvm_unreachable("Unknown RegBank!"); 207 } 208 209 const TargetRegisterClass * 210 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, 211 MachineRegisterInfo &MRI) const { 212 const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); 213 return getRegClass(Ty, RegBank); 214 } 215 216 static unsigned getSubRegIndex(const TargetRegisterClass *RC) { 217 unsigned SubIdx = X86::NoSubRegister; 218 if (RC == &X86::GR32RegClass) { 219 SubIdx = X86::sub_32bit; 220 } else if (RC == &X86::GR16RegClass) { 221 SubIdx = X86::sub_16bit; 222 } else if (RC == &X86::GR8RegClass) { 223 SubIdx = X86::sub_8bit; 224 } 225 226 return SubIdx; 227 } 228 229 static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) { 230 assert(Reg.isPhysical()); 231 if (X86::GR64RegClass.contains(Reg)) 232 return &X86::GR64RegClass; 233 if (X86::GR32RegClass.contains(Reg)) 234 return &X86::GR32RegClass; 235 if (X86::GR16RegClass.contains(Reg)) 236 return &X86::GR16RegClass; 237 if (X86::GR8RegClass.contains(Reg)) 238 return &X86::GR8RegClass; 239 240 llvm_unreachable("Unknown RegClass for PhysReg!"); 241 } 242 243 // FIXME: We need some sort of API in RBI/TRI to allow generic code to 244 // constrain operands of simple instructions given a TargetRegisterClass 245 // and LLT 246 bool X86InstructionSelector::selectDebugInstr(MachineInstr &I, 247 MachineRegisterInfo &MRI) const { 248 for (MachineOperand &MO : I.operands()) { 249 if (!MO.isReg()) 250 continue; 251 Register Reg = MO.getReg(); 252 if (!Reg) 253 continue; 254 if (Reg.isPhysical()) 255 continue; 256 LLT Ty = MRI.getType(Reg); 257 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 258 const TargetRegisterClass *RC = 259 dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank); 260 if (!RC) { 261 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank); 262 RC = getRegClass(Ty, RB); 263 if (!RC) { 264 LLVM_DEBUG( 265 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"); 266 break; 267 } 268 } 269 RBI.constrainGenericRegister(Reg, *RC, MRI); 270 } 271 272 return true; 273 } 274 275 // Set X86 Opcode and constrain DestReg. 276 bool X86InstructionSelector::selectCopy(MachineInstr &I, 277 MachineRegisterInfo &MRI) const { 278 Register DstReg = I.getOperand(0).getReg(); 279 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 280 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 281 282 Register SrcReg = I.getOperand(1).getReg(); 283 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 284 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 285 286 if (DstReg.isPhysical()) { 287 assert(I.isCopy() && "Generic operators do not allow physical registers"); 288 289 if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && 290 DstRegBank.getID() == X86::GPRRegBankID) { 291 292 const TargetRegisterClass *SrcRC = 293 getRegClass(MRI.getType(SrcReg), SrcRegBank); 294 const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg); 295 296 if (SrcRC != DstRC) { 297 // This case can be generated by ABI lowering, performe anyext 298 Register ExtSrc = MRI.createVirtualRegister(DstRC); 299 BuildMI(*I.getParent(), I, I.getDebugLoc(), 300 TII.get(TargetOpcode::SUBREG_TO_REG)) 301 .addDef(ExtSrc) 302 .addImm(0) 303 .addReg(SrcReg) 304 .addImm(getSubRegIndex(SrcRC)); 305 306 I.getOperand(1).setReg(ExtSrc); 307 } 308 } 309 310 return true; 311 } 312 313 assert((!SrcReg.isPhysical() || I.isCopy()) && 314 "No phys reg on generic operators"); 315 assert((DstSize == SrcSize || 316 // Copies are a mean to setup initial types, the number of 317 // bits may not exactly match. 318 (SrcReg.isPhysical() && 319 DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && 320 "Copy with different width?!"); 321 322 const TargetRegisterClass *DstRC = 323 getRegClass(MRI.getType(DstReg), DstRegBank); 324 325 if (SrcRegBank.getID() == X86::GPRRegBankID && 326 DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && 327 SrcReg.isPhysical()) { 328 // Change the physical register to performe truncate. 329 330 const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); 331 332 if (DstRC != SrcRC) { 333 I.getOperand(1).setSubReg(getSubRegIndex(DstRC)); 334 I.getOperand(1).substPhysReg(SrcReg, TRI); 335 } 336 } 337 338 // No need to constrain SrcReg. It will get constrained when 339 // we hit another of its use or its defs. 340 // Copies do not have constraints. 341 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); 342 if (!OldRC || !DstRC->hasSubClassEq(OldRC)) { 343 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 344 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 345 << " operand\n"); 346 return false; 347 } 348 } 349 I.setDesc(TII.get(X86::COPY)); 350 return true; 351 } 352 353 bool X86InstructionSelector::select(MachineInstr &I) { 354 assert(I.getParent() && "Instruction should be in a basic block!"); 355 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 356 357 MachineBasicBlock &MBB = *I.getParent(); 358 MachineFunction &MF = *MBB.getParent(); 359 MachineRegisterInfo &MRI = MF.getRegInfo(); 360 361 unsigned Opcode = I.getOpcode(); 362 if (!isPreISelGenericOpcode(Opcode)) { 363 // Certain non-generic instructions also need some special handling. 364 365 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 366 return false; 367 368 if (I.isCopy()) 369 return selectCopy(I, MRI); 370 371 if (I.isDebugInstr()) 372 return selectDebugInstr(I, MRI); 373 374 return true; 375 } 376 377 assert(I.getNumOperands() == I.getNumExplicitOperands() && 378 "Generic instruction has unexpected implicit operands\n"); 379 380 if (selectImpl(I, *CoverageInfo)) 381 return true; 382 383 LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); 384 385 // TODO: This should be implemented by tblgen. 386 switch (I.getOpcode()) { 387 default: 388 return false; 389 case TargetOpcode::G_STORE: 390 case TargetOpcode::G_LOAD: 391 return selectLoadStoreOp(I, MRI, MF); 392 case TargetOpcode::G_PTR_ADD: 393 case TargetOpcode::G_FRAME_INDEX: 394 return selectFrameIndexOrGep(I, MRI, MF); 395 case TargetOpcode::G_GLOBAL_VALUE: 396 return selectGlobalValue(I, MRI, MF); 397 case TargetOpcode::G_CONSTANT: 398 return selectConstant(I, MRI, MF); 399 case TargetOpcode::G_FCONSTANT: 400 return materializeFP(I, MRI, MF); 401 case TargetOpcode::G_PTRTOINT: 402 case TargetOpcode::G_TRUNC: 403 return selectTruncOrPtrToInt(I, MRI, MF); 404 case TargetOpcode::G_INTTOPTR: 405 return selectCopy(I, MRI); 406 case TargetOpcode::G_ZEXT: 407 return selectZext(I, MRI, MF); 408 case TargetOpcode::G_ANYEXT: 409 return selectAnyext(I, MRI, MF); 410 case TargetOpcode::G_ICMP: 411 return selectCmp(I, MRI, MF); 412 case TargetOpcode::G_FCMP: 413 return selectFCmp(I, MRI, MF); 414 case TargetOpcode::G_UADDE: 415 case TargetOpcode::G_UADDO: 416 case TargetOpcode::G_USUBE: 417 case TargetOpcode::G_USUBO: 418 return selectUAddSub(I, MRI, MF); 419 case TargetOpcode::G_UNMERGE_VALUES: 420 return selectUnmergeValues(I, MRI, MF); 421 case TargetOpcode::G_MERGE_VALUES: 422 case TargetOpcode::G_CONCAT_VECTORS: 423 return selectMergeValues(I, MRI, MF); 424 case TargetOpcode::G_EXTRACT: 425 return selectExtract(I, MRI, MF); 426 case TargetOpcode::G_INSERT: 427 return selectInsert(I, MRI, MF); 428 case TargetOpcode::G_BRCOND: 429 return selectCondBranch(I, MRI, MF); 430 case TargetOpcode::G_IMPLICIT_DEF: 431 case TargetOpcode::G_PHI: 432 return selectImplicitDefOrPHI(I, MRI); 433 case TargetOpcode::G_MUL: 434 case TargetOpcode::G_SMULH: 435 case TargetOpcode::G_UMULH: 436 case TargetOpcode::G_SDIV: 437 case TargetOpcode::G_UDIV: 438 case TargetOpcode::G_SREM: 439 case TargetOpcode::G_UREM: 440 return selectMulDivRem(I, MRI, MF); 441 case TargetOpcode::G_SELECT: 442 return selectSelect(I, MRI, MF); 443 } 444 445 return false; 446 } 447 448 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, 449 const RegisterBank &RB, 450 unsigned Opc, 451 Align Alignment) const { 452 bool Isload = (Opc == TargetOpcode::G_LOAD); 453 bool HasAVX = STI.hasAVX(); 454 bool HasAVX512 = STI.hasAVX512(); 455 bool HasVLX = STI.hasVLX(); 456 457 if (Ty == LLT::scalar(8)) { 458 if (X86::GPRRegBankID == RB.getID()) 459 return Isload ? X86::MOV8rm : X86::MOV8mr; 460 } else if (Ty == LLT::scalar(16)) { 461 if (X86::GPRRegBankID == RB.getID()) 462 return Isload ? X86::MOV16rm : X86::MOV16mr; 463 } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { 464 if (X86::GPRRegBankID == RB.getID()) 465 return Isload ? X86::MOV32rm : X86::MOV32mr; 466 if (X86::VECRRegBankID == RB.getID()) 467 return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt : 468 HasAVX ? X86::VMOVSSrm_alt : 469 X86::MOVSSrm_alt) 470 : (HasAVX512 ? X86::VMOVSSZmr : 471 HasAVX ? X86::VMOVSSmr : 472 X86::MOVSSmr); 473 if (X86::PSRRegBankID == RB.getID()) 474 return Isload ? X86::LD_Fp32m : X86::ST_Fp32m; 475 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { 476 if (X86::GPRRegBankID == RB.getID()) 477 return Isload ? X86::MOV64rm : X86::MOV64mr; 478 if (X86::VECRRegBankID == RB.getID()) 479 return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt : 480 HasAVX ? X86::VMOVSDrm_alt : 481 X86::MOVSDrm_alt) 482 : (HasAVX512 ? X86::VMOVSDZmr : 483 HasAVX ? X86::VMOVSDmr : 484 X86::MOVSDmr); 485 if (X86::PSRRegBankID == RB.getID()) 486 return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; 487 } else if (Ty == LLT::scalar(80)) { 488 return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; 489 } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { 490 if (Alignment >= Align(16)) 491 return Isload ? (HasVLX ? X86::VMOVAPSZ128rm 492 : HasAVX512 493 ? X86::VMOVAPSZ128rm_NOVLX 494 : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) 495 : (HasVLX ? X86::VMOVAPSZ128mr 496 : HasAVX512 497 ? X86::VMOVAPSZ128mr_NOVLX 498 : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); 499 else 500 return Isload ? (HasVLX ? X86::VMOVUPSZ128rm 501 : HasAVX512 502 ? X86::VMOVUPSZ128rm_NOVLX 503 : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) 504 : (HasVLX ? X86::VMOVUPSZ128mr 505 : HasAVX512 506 ? X86::VMOVUPSZ128mr_NOVLX 507 : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); 508 } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { 509 if (Alignment >= Align(32)) 510 return Isload ? (HasVLX ? X86::VMOVAPSZ256rm 511 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX 512 : X86::VMOVAPSYrm) 513 : (HasVLX ? X86::VMOVAPSZ256mr 514 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX 515 : X86::VMOVAPSYmr); 516 else 517 return Isload ? (HasVLX ? X86::VMOVUPSZ256rm 518 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX 519 : X86::VMOVUPSYrm) 520 : (HasVLX ? X86::VMOVUPSZ256mr 521 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX 522 : X86::VMOVUPSYmr); 523 } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { 524 if (Alignment >= Align(64)) 525 return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 526 else 527 return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 528 } 529 return Opc; 530 } 531 532 // Fill in an address from the given instruction. 533 static void X86SelectAddress(const MachineInstr &I, 534 const MachineRegisterInfo &MRI, 535 X86AddressMode &AM) { 536 assert(I.getOperand(0).isReg() && "unsupported opperand."); 537 assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && 538 "unsupported type."); 539 540 if (I.getOpcode() == TargetOpcode::G_PTR_ADD) { 541 if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) { 542 int64_t Imm = *COff; 543 if (isInt<32>(Imm)) { // Check for displacement overflow. 544 AM.Disp = static_cast<int32_t>(Imm); 545 AM.Base.Reg = I.getOperand(1).getReg(); 546 return; 547 } 548 } 549 } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { 550 AM.Base.FrameIndex = I.getOperand(1).getIndex(); 551 AM.BaseType = X86AddressMode::FrameIndexBase; 552 return; 553 } 554 555 // Default behavior. 556 AM.Base.Reg = I.getOperand(0).getReg(); 557 } 558 559 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, 560 MachineRegisterInfo &MRI, 561 MachineFunction &MF) const { 562 unsigned Opc = I.getOpcode(); 563 564 assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && 565 "Only G_STORE and G_LOAD are expected for selection"); 566 567 const Register DefReg = I.getOperand(0).getReg(); 568 LLT Ty = MRI.getType(DefReg); 569 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 570 571 assert(I.hasOneMemOperand()); 572 auto &MemOp = **I.memoperands_begin(); 573 if (MemOp.isAtomic()) { 574 // Note: for unordered operations, we rely on the fact the appropriate MMO 575 // is already on the instruction we're mutating, and thus we don't need to 576 // make any changes. So long as we select an opcode which is capable of 577 // loading or storing the appropriate size atomically, the rest of the 578 // backend is required to respect the MMO state. 579 if (!MemOp.isUnordered()) { 580 LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n"); 581 return false; 582 } 583 if (MemOp.getAlign() < Ty.getSizeInBits() / 8) { 584 LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n"); 585 return false; 586 } 587 } 588 589 unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign()); 590 if (NewOpc == Opc) 591 return false; 592 593 I.setDesc(TII.get(NewOpc)); 594 MachineInstrBuilder MIB(MF, I); 595 const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg()); 596 597 if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) { 598 assert(Opc == TargetOpcode::G_LOAD && 599 "Only G_LOAD from constant pool is expected"); 600 // TODO: Need a separate move for Large model 601 if (TM.getCodeModel() == CodeModel::Large) 602 return false; 603 604 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 605 unsigned PICBase = 0; 606 if (OpFlag == X86II::MO_GOTOFF) 607 PICBase = TII.getGlobalBaseReg(&MF); 608 else if (STI.is64Bit()) 609 PICBase = X86::RIP; 610 611 I.removeOperand(1); 612 addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase, 613 OpFlag); 614 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 615 } 616 617 X86AddressMode AM; 618 X86SelectAddress(*Ptr, MRI, AM); 619 if (Opc == TargetOpcode::G_LOAD) { 620 I.removeOperand(1); 621 addFullAddress(MIB, AM); 622 } else { 623 // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) 624 I.removeOperand(1); 625 I.removeOperand(0); 626 addFullAddress(MIB, AM).addUse(DefReg); 627 } 628 bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); 629 I.addImplicitDefUseOperands(MF); 630 return Constrained; 631 } 632 633 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { 634 if (Ty == LLT::pointer(0, 64)) 635 return X86::LEA64r; 636 else if (Ty == LLT::pointer(0, 32)) 637 return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; 638 else 639 llvm_unreachable("Can't get LEA opcode. Unsupported type."); 640 } 641 642 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, 643 MachineRegisterInfo &MRI, 644 MachineFunction &MF) const { 645 unsigned Opc = I.getOpcode(); 646 647 assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) && 648 "unexpected instruction"); 649 650 const Register DefReg = I.getOperand(0).getReg(); 651 LLT Ty = MRI.getType(DefReg); 652 653 // Use LEA to calculate frame index and GEP 654 unsigned NewOpc = getLeaOP(Ty, STI); 655 I.setDesc(TII.get(NewOpc)); 656 MachineInstrBuilder MIB(MF, I); 657 658 if (Opc == TargetOpcode::G_FRAME_INDEX) { 659 addOffset(MIB, 0); 660 } else { 661 MachineOperand &InxOp = I.getOperand(2); 662 I.addOperand(InxOp); // set IndexReg 663 InxOp.ChangeToImmediate(1); // set Scale 664 MIB.addImm(0).addReg(0); 665 } 666 667 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 668 } 669 670 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, 671 MachineRegisterInfo &MRI, 672 MachineFunction &MF) const { 673 assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && 674 "unexpected instruction"); 675 676 auto GV = I.getOperand(1).getGlobal(); 677 if (GV->isThreadLocal()) { 678 return false; // TODO: we don't support TLS yet. 679 } 680 681 // Can't handle alternate code models yet. 682 if (TM.getCodeModel() != CodeModel::Small) 683 return false; 684 685 X86AddressMode AM; 686 AM.GV = GV; 687 AM.GVOpFlags = STI.classifyGlobalReference(GV); 688 689 // TODO: The ABI requires an extra load. not supported yet. 690 if (isGlobalStubReference(AM.GVOpFlags)) 691 return false; 692 693 // TODO: This reference is relative to the pic base. not supported yet. 694 if (isGlobalRelativeToPICBase(AM.GVOpFlags)) 695 return false; 696 697 if (STI.isPICStyleRIPRel()) { 698 // Use rip-relative addressing. 699 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 700 AM.Base.Reg = X86::RIP; 701 } 702 703 const Register DefReg = I.getOperand(0).getReg(); 704 LLT Ty = MRI.getType(DefReg); 705 unsigned NewOpc = getLeaOP(Ty, STI); 706 707 I.setDesc(TII.get(NewOpc)); 708 MachineInstrBuilder MIB(MF, I); 709 710 I.removeOperand(1); 711 addFullAddress(MIB, AM); 712 713 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 714 } 715 716 bool X86InstructionSelector::selectConstant(MachineInstr &I, 717 MachineRegisterInfo &MRI, 718 MachineFunction &MF) const { 719 assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && 720 "unexpected instruction"); 721 722 const Register DefReg = I.getOperand(0).getReg(); 723 LLT Ty = MRI.getType(DefReg); 724 725 if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) 726 return false; 727 728 uint64_t Val = 0; 729 if (I.getOperand(1).isCImm()) { 730 Val = I.getOperand(1).getCImm()->getZExtValue(); 731 I.getOperand(1).ChangeToImmediate(Val); 732 } else if (I.getOperand(1).isImm()) { 733 Val = I.getOperand(1).getImm(); 734 } else 735 llvm_unreachable("Unsupported operand type."); 736 737 unsigned NewOpc; 738 switch (Ty.getSizeInBits()) { 739 case 8: 740 NewOpc = X86::MOV8ri; 741 break; 742 case 16: 743 NewOpc = X86::MOV16ri; 744 break; 745 case 32: 746 NewOpc = X86::MOV32ri; 747 break; 748 case 64: 749 // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used 750 if (isInt<32>(Val)) 751 NewOpc = X86::MOV64ri32; 752 else 753 NewOpc = X86::MOV64ri; 754 break; 755 default: 756 llvm_unreachable("Can't select G_CONSTANT, unsupported type."); 757 } 758 759 I.setDesc(TII.get(NewOpc)); 760 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 761 } 762 763 // Helper function for selectTruncOrPtrToInt and selectAnyext. 764 // Returns true if DstRC lives on a floating register class and 765 // SrcRC lives on a 128-bit vector class. 766 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, 767 const TargetRegisterClass *SrcRC) { 768 return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || 769 DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && 770 (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); 771 } 772 773 bool X86InstructionSelector::selectTurnIntoCOPY( 774 MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg, 775 const TargetRegisterClass *DstRC, const unsigned SrcReg, 776 const TargetRegisterClass *SrcRC) const { 777 778 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 779 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 780 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 781 << " operand\n"); 782 return false; 783 } 784 I.setDesc(TII.get(X86::COPY)); 785 return true; 786 } 787 788 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, 789 MachineRegisterInfo &MRI, 790 MachineFunction &MF) const { 791 assert((I.getOpcode() == TargetOpcode::G_TRUNC || 792 I.getOpcode() == TargetOpcode::G_PTRTOINT) && 793 "unexpected instruction"); 794 795 const Register DstReg = I.getOperand(0).getReg(); 796 const Register SrcReg = I.getOperand(1).getReg(); 797 798 const LLT DstTy = MRI.getType(DstReg); 799 const LLT SrcTy = MRI.getType(SrcReg); 800 801 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 802 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 803 804 if (DstRB.getID() != SrcRB.getID()) { 805 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) 806 << " input/output on different banks\n"); 807 return false; 808 } 809 810 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 811 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 812 813 if (!DstRC || !SrcRC) 814 return false; 815 816 // If that's truncation of the value that lives on the vector class and goes 817 // into the floating class, just replace it with copy, as we are able to 818 // select it as a regular move. 819 if (canTurnIntoCOPY(DstRC, SrcRC)) 820 return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); 821 822 if (DstRB.getID() != X86::GPRRegBankID) 823 return false; 824 825 unsigned SubIdx; 826 if (DstRC == SrcRC) { 827 // Nothing to be done 828 SubIdx = X86::NoSubRegister; 829 } else if (DstRC == &X86::GR32RegClass) { 830 SubIdx = X86::sub_32bit; 831 } else if (DstRC == &X86::GR16RegClass) { 832 SubIdx = X86::sub_16bit; 833 } else if (DstRC == &X86::GR8RegClass) { 834 SubIdx = X86::sub_8bit; 835 } else { 836 return false; 837 } 838 839 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 840 841 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 842 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 843 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 844 << "\n"); 845 return false; 846 } 847 848 I.getOperand(1).setSubReg(SubIdx); 849 850 I.setDesc(TII.get(X86::COPY)); 851 return true; 852 } 853 854 bool X86InstructionSelector::selectZext(MachineInstr &I, 855 MachineRegisterInfo &MRI, 856 MachineFunction &MF) const { 857 assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); 858 859 const Register DstReg = I.getOperand(0).getReg(); 860 const Register SrcReg = I.getOperand(1).getReg(); 861 862 const LLT DstTy = MRI.getType(DstReg); 863 const LLT SrcTy = MRI.getType(SrcReg); 864 865 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) && 866 "8=>16 Zext is handled by tablegen"); 867 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) && 868 "8=>32 Zext is handled by tablegen"); 869 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) && 870 "16=>32 Zext is handled by tablegen"); 871 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) && 872 "8=>64 Zext is handled by tablegen"); 873 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) && 874 "16=>64 Zext is handled by tablegen"); 875 assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) && 876 "32=>64 Zext is handled by tablegen"); 877 878 if (SrcTy != LLT::scalar(1)) 879 return false; 880 881 unsigned AndOpc; 882 if (DstTy == LLT::scalar(8)) 883 AndOpc = X86::AND8ri; 884 else if (DstTy == LLT::scalar(16)) 885 AndOpc = X86::AND16ri; 886 else if (DstTy == LLT::scalar(32)) 887 AndOpc = X86::AND32ri; 888 else if (DstTy == LLT::scalar(64)) 889 AndOpc = X86::AND64ri32; 890 else 891 return false; 892 893 Register DefReg = SrcReg; 894 if (DstTy != LLT::scalar(8)) { 895 Register ImpDefReg = 896 MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 897 BuildMI(*I.getParent(), I, I.getDebugLoc(), 898 TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg); 899 900 DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 901 BuildMI(*I.getParent(), I, I.getDebugLoc(), 902 TII.get(TargetOpcode::INSERT_SUBREG), DefReg) 903 .addReg(ImpDefReg) 904 .addReg(SrcReg) 905 .addImm(X86::sub_8bit); 906 } 907 908 MachineInstr &AndInst = 909 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) 910 .addReg(DefReg) 911 .addImm(1); 912 913 constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); 914 915 I.eraseFromParent(); 916 return true; 917 } 918 919 bool X86InstructionSelector::selectAnyext(MachineInstr &I, 920 MachineRegisterInfo &MRI, 921 MachineFunction &MF) const { 922 assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); 923 924 const Register DstReg = I.getOperand(0).getReg(); 925 const Register SrcReg = I.getOperand(1).getReg(); 926 927 const LLT DstTy = MRI.getType(DstReg); 928 const LLT SrcTy = MRI.getType(SrcReg); 929 930 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 931 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 932 933 assert(DstRB.getID() == SrcRB.getID() && 934 "G_ANYEXT input/output on different banks\n"); 935 936 assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && 937 "G_ANYEXT incorrect operand size"); 938 939 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 940 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 941 942 // If that's ANY_EXT of the value that lives on the floating class and goes 943 // into the vector class, just replace it with copy, as we are able to select 944 // it as a regular move. 945 if (canTurnIntoCOPY(SrcRC, DstRC)) 946 return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC); 947 948 if (DstRB.getID() != X86::GPRRegBankID) 949 return false; 950 951 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 952 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 953 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 954 << " operand\n"); 955 return false; 956 } 957 958 if (SrcRC == DstRC) { 959 I.setDesc(TII.get(X86::COPY)); 960 return true; 961 } 962 963 BuildMI(*I.getParent(), I, I.getDebugLoc(), 964 TII.get(TargetOpcode::SUBREG_TO_REG)) 965 .addDef(DstReg) 966 .addImm(0) 967 .addReg(SrcReg) 968 .addImm(getSubRegIndex(SrcRC)); 969 970 I.eraseFromParent(); 971 return true; 972 } 973 974 bool X86InstructionSelector::selectCmp(MachineInstr &I, 975 MachineRegisterInfo &MRI, 976 MachineFunction &MF) const { 977 assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction"); 978 979 X86::CondCode CC; 980 bool SwapArgs; 981 std::tie(CC, SwapArgs) = X86::getX86ConditionCode( 982 (CmpInst::Predicate)I.getOperand(1).getPredicate()); 983 984 Register LHS = I.getOperand(2).getReg(); 985 Register RHS = I.getOperand(3).getReg(); 986 987 if (SwapArgs) 988 std::swap(LHS, RHS); 989 990 unsigned OpCmp; 991 LLT Ty = MRI.getType(LHS); 992 993 switch (Ty.getSizeInBits()) { 994 default: 995 return false; 996 case 8: 997 OpCmp = X86::CMP8rr; 998 break; 999 case 16: 1000 OpCmp = X86::CMP16rr; 1001 break; 1002 case 32: 1003 OpCmp = X86::CMP32rr; 1004 break; 1005 case 64: 1006 OpCmp = X86::CMP64rr; 1007 break; 1008 } 1009 1010 MachineInstr &CmpInst = 1011 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1012 .addReg(LHS) 1013 .addReg(RHS); 1014 1015 MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1016 TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC); 1017 1018 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1019 constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); 1020 1021 I.eraseFromParent(); 1022 return true; 1023 } 1024 1025 bool X86InstructionSelector::selectFCmp(MachineInstr &I, 1026 MachineRegisterInfo &MRI, 1027 MachineFunction &MF) const { 1028 assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); 1029 1030 Register LhsReg = I.getOperand(2).getReg(); 1031 Register RhsReg = I.getOperand(3).getReg(); 1032 CmpInst::Predicate Predicate = 1033 (CmpInst::Predicate)I.getOperand(1).getPredicate(); 1034 1035 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1036 static const uint16_t SETFOpcTable[2][3] = { 1037 {X86::COND_E, X86::COND_NP, X86::AND8rr}, 1038 {X86::COND_NE, X86::COND_P, X86::OR8rr}}; 1039 const uint16_t *SETFOpc = nullptr; 1040 switch (Predicate) { 1041 default: 1042 break; 1043 case CmpInst::FCMP_OEQ: 1044 SETFOpc = &SETFOpcTable[0][0]; 1045 break; 1046 case CmpInst::FCMP_UNE: 1047 SETFOpc = &SETFOpcTable[1][0]; 1048 break; 1049 } 1050 1051 // Compute the opcode for the CMP instruction. 1052 unsigned OpCmp; 1053 LLT Ty = MRI.getType(LhsReg); 1054 switch (Ty.getSizeInBits()) { 1055 default: 1056 return false; 1057 case 32: 1058 OpCmp = X86::UCOMISSrr; 1059 break; 1060 case 64: 1061 OpCmp = X86::UCOMISDrr; 1062 break; 1063 } 1064 1065 Register ResultReg = I.getOperand(0).getReg(); 1066 RBI.constrainGenericRegister( 1067 ResultReg, 1068 *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); 1069 if (SETFOpc) { 1070 MachineInstr &CmpInst = 1071 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1072 .addReg(LhsReg) 1073 .addReg(RhsReg); 1074 1075 Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); 1076 Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); 1077 MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1078 TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); 1079 MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1080 TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]); 1081 MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1082 TII.get(SETFOpc[2]), ResultReg) 1083 .addReg(FlagReg1) 1084 .addReg(FlagReg2); 1085 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1086 constrainSelectedInstRegOperands(Set1, TII, TRI, RBI); 1087 constrainSelectedInstRegOperands(Set2, TII, TRI, RBI); 1088 constrainSelectedInstRegOperands(Set3, TII, TRI, RBI); 1089 1090 I.eraseFromParent(); 1091 return true; 1092 } 1093 1094 X86::CondCode CC; 1095 bool SwapArgs; 1096 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 1097 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1098 1099 if (SwapArgs) 1100 std::swap(LhsReg, RhsReg); 1101 1102 // Emit a compare of LHS/RHS. 1103 MachineInstr &CmpInst = 1104 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1105 .addReg(LhsReg) 1106 .addReg(RhsReg); 1107 1108 MachineInstr &Set = 1109 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC); 1110 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1111 constrainSelectedInstRegOperands(Set, TII, TRI, RBI); 1112 I.eraseFromParent(); 1113 return true; 1114 } 1115 1116 bool X86InstructionSelector::selectUAddSub(MachineInstr &I, 1117 MachineRegisterInfo &MRI, 1118 MachineFunction &MF) const { 1119 assert((I.getOpcode() == TargetOpcode::G_UADDE || 1120 I.getOpcode() == TargetOpcode::G_UADDO || 1121 I.getOpcode() == TargetOpcode::G_USUBE || 1122 I.getOpcode() == TargetOpcode::G_USUBO) && 1123 "unexpected instruction"); 1124 1125 const Register DstReg = I.getOperand(0).getReg(); 1126 const Register CarryOutReg = I.getOperand(1).getReg(); 1127 const Register Op0Reg = I.getOperand(2).getReg(); 1128 const Register Op1Reg = I.getOperand(3).getReg(); 1129 bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || 1130 I.getOpcode() == TargetOpcode::G_USUBO; 1131 bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || 1132 I.getOpcode() == TargetOpcode::G_USUBE; 1133 1134 const LLT DstTy = MRI.getType(DstReg); 1135 assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); 1136 1137 // TODO: Handle immediate argument variants? 1138 unsigned OpADC, OpADD, OpSBB, OpSUB; 1139 switch (DstTy.getSizeInBits()) { 1140 case 8: 1141 OpADC = X86::ADC8rr; 1142 OpADD = X86::ADD8rr; 1143 OpSBB = X86::SBB8rr; 1144 OpSUB = X86::SUB8rr; 1145 break; 1146 case 16: 1147 OpADC = X86::ADC16rr; 1148 OpADD = X86::ADD16rr; 1149 OpSBB = X86::SBB16rr; 1150 OpSUB = X86::SUB16rr; 1151 break; 1152 case 32: 1153 OpADC = X86::ADC32rr; 1154 OpADD = X86::ADD32rr; 1155 OpSBB = X86::SBB32rr; 1156 OpSUB = X86::SUB32rr; 1157 break; 1158 case 64: 1159 OpADC = X86::ADC64rr; 1160 OpADD = X86::ADD64rr; 1161 OpSBB = X86::SBB64rr; 1162 OpSUB = X86::SUB64rr; 1163 break; 1164 default: 1165 llvm_unreachable("selectUAddSub unsupported type."); 1166 } 1167 1168 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 1169 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 1170 1171 unsigned Opcode = IsSub ? OpSUB : OpADD; 1172 1173 // G_UADDE/G_USUBE - find CarryIn def instruction. 1174 if (HasCarryIn) { 1175 Register CarryInReg = I.getOperand(4).getReg(); 1176 MachineInstr *Def = MRI.getVRegDef(CarryInReg); 1177 while (Def->getOpcode() == TargetOpcode::G_TRUNC) { 1178 CarryInReg = Def->getOperand(1).getReg(); 1179 Def = MRI.getVRegDef(CarryInReg); 1180 } 1181 1182 // TODO - handle more CF generating instructions 1183 if (Def->getOpcode() == TargetOpcode::G_UADDE || 1184 Def->getOpcode() == TargetOpcode::G_UADDO || 1185 Def->getOpcode() == TargetOpcode::G_USUBE || 1186 Def->getOpcode() == TargetOpcode::G_USUBO) { 1187 // carry set by prev ADD/SUB. 1188 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), 1189 X86::EFLAGS) 1190 .addReg(CarryInReg); 1191 1192 if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) 1193 return false; 1194 1195 Opcode = IsSub ? OpSBB : OpADC; 1196 } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) { 1197 // carry is constant, support only 0. 1198 if (*val != 0) 1199 return false; 1200 1201 Opcode = IsSub ? OpSUB : OpADD; 1202 } else 1203 return false; 1204 } 1205 1206 MachineInstr &Inst = 1207 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) 1208 .addReg(Op0Reg) 1209 .addReg(Op1Reg); 1210 1211 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) 1212 .addReg(X86::EFLAGS); 1213 1214 if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || 1215 !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) 1216 return false; 1217 1218 I.eraseFromParent(); 1219 return true; 1220 } 1221 1222 bool X86InstructionSelector::selectExtract(MachineInstr &I, 1223 MachineRegisterInfo &MRI, 1224 MachineFunction &MF) const { 1225 assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && 1226 "unexpected instruction"); 1227 1228 const Register DstReg = I.getOperand(0).getReg(); 1229 const Register SrcReg = I.getOperand(1).getReg(); 1230 int64_t Index = I.getOperand(2).getImm(); 1231 1232 const LLT DstTy = MRI.getType(DstReg); 1233 const LLT SrcTy = MRI.getType(SrcReg); 1234 1235 // Meanwile handle vector type only. 1236 if (!DstTy.isVector()) 1237 return false; 1238 1239 if (Index % DstTy.getSizeInBits() != 0) 1240 return false; // Not extract subvector. 1241 1242 if (Index == 0) { 1243 // Replace by extract subreg copy. 1244 if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) 1245 return false; 1246 1247 I.eraseFromParent(); 1248 return true; 1249 } 1250 1251 bool HasAVX = STI.hasAVX(); 1252 bool HasAVX512 = STI.hasAVX512(); 1253 bool HasVLX = STI.hasVLX(); 1254 1255 if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { 1256 if (HasVLX) 1257 I.setDesc(TII.get(X86::VEXTRACTF32X4Z256rri)); 1258 else if (HasAVX) 1259 I.setDesc(TII.get(X86::VEXTRACTF128rri)); 1260 else 1261 return false; 1262 } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { 1263 if (DstTy.getSizeInBits() == 128) 1264 I.setDesc(TII.get(X86::VEXTRACTF32X4Zrri)); 1265 else if (DstTy.getSizeInBits() == 256) 1266 I.setDesc(TII.get(X86::VEXTRACTF64X4Zrri)); 1267 else 1268 return false; 1269 } else 1270 return false; 1271 1272 // Convert to X86 VEXTRACT immediate. 1273 Index = Index / DstTy.getSizeInBits(); 1274 I.getOperand(2).setImm(Index); 1275 1276 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1277 } 1278 1279 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, 1280 MachineInstr &I, 1281 MachineRegisterInfo &MRI, 1282 MachineFunction &MF) const { 1283 const LLT DstTy = MRI.getType(DstReg); 1284 const LLT SrcTy = MRI.getType(SrcReg); 1285 unsigned SubIdx = X86::NoSubRegister; 1286 1287 if (!DstTy.isVector() || !SrcTy.isVector()) 1288 return false; 1289 1290 assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && 1291 "Incorrect Src/Dst register size"); 1292 1293 if (DstTy.getSizeInBits() == 128) 1294 SubIdx = X86::sub_xmm; 1295 else if (DstTy.getSizeInBits() == 256) 1296 SubIdx = X86::sub_ymm; 1297 else 1298 return false; 1299 1300 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1301 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1302 1303 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 1304 1305 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1306 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1307 LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n"); 1308 return false; 1309 } 1310 1311 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) 1312 .addReg(SrcReg, 0, SubIdx); 1313 1314 return true; 1315 } 1316 1317 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, 1318 MachineInstr &I, 1319 MachineRegisterInfo &MRI, 1320 MachineFunction &MF) const { 1321 const LLT DstTy = MRI.getType(DstReg); 1322 const LLT SrcTy = MRI.getType(SrcReg); 1323 unsigned SubIdx = X86::NoSubRegister; 1324 1325 // TODO: support scalar types 1326 if (!DstTy.isVector() || !SrcTy.isVector()) 1327 return false; 1328 1329 assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && 1330 "Incorrect Src/Dst register size"); 1331 1332 if (SrcTy.getSizeInBits() == 128) 1333 SubIdx = X86::sub_xmm; 1334 else if (SrcTy.getSizeInBits() == 256) 1335 SubIdx = X86::sub_ymm; 1336 else 1337 return false; 1338 1339 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1340 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1341 1342 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1343 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1344 LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); 1345 return false; 1346 } 1347 1348 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) 1349 .addReg(DstReg, RegState::DefineNoRead, SubIdx) 1350 .addReg(SrcReg); 1351 1352 return true; 1353 } 1354 1355 bool X86InstructionSelector::selectInsert(MachineInstr &I, 1356 MachineRegisterInfo &MRI, 1357 MachineFunction &MF) const { 1358 assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); 1359 1360 const Register DstReg = I.getOperand(0).getReg(); 1361 const Register SrcReg = I.getOperand(1).getReg(); 1362 const Register InsertReg = I.getOperand(2).getReg(); 1363 int64_t Index = I.getOperand(3).getImm(); 1364 1365 const LLT DstTy = MRI.getType(DstReg); 1366 const LLT InsertRegTy = MRI.getType(InsertReg); 1367 1368 // Meanwile handle vector type only. 1369 if (!DstTy.isVector()) 1370 return false; 1371 1372 if (Index % InsertRegTy.getSizeInBits() != 0) 1373 return false; // Not insert subvector. 1374 1375 if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { 1376 // Replace by subreg copy. 1377 if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) 1378 return false; 1379 1380 I.eraseFromParent(); 1381 return true; 1382 } 1383 1384 bool HasAVX = STI.hasAVX(); 1385 bool HasAVX512 = STI.hasAVX512(); 1386 bool HasVLX = STI.hasVLX(); 1387 1388 if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { 1389 if (HasVLX) 1390 I.setDesc(TII.get(X86::VINSERTF32X4Z256rri)); 1391 else if (HasAVX) 1392 I.setDesc(TII.get(X86::VINSERTF128rri)); 1393 else 1394 return false; 1395 } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { 1396 if (InsertRegTy.getSizeInBits() == 128) 1397 I.setDesc(TII.get(X86::VINSERTF32X4Zrri)); 1398 else if (InsertRegTy.getSizeInBits() == 256) 1399 I.setDesc(TII.get(X86::VINSERTF64X4Zrri)); 1400 else 1401 return false; 1402 } else 1403 return false; 1404 1405 // Convert to X86 VINSERT immediate. 1406 Index = Index / InsertRegTy.getSizeInBits(); 1407 1408 I.getOperand(3).setImm(Index); 1409 1410 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1411 } 1412 1413 bool X86InstructionSelector::selectUnmergeValues( 1414 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1415 assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && 1416 "unexpected instruction"); 1417 1418 // Split to extracts. 1419 unsigned NumDefs = I.getNumOperands() - 1; 1420 Register SrcReg = I.getOperand(NumDefs).getReg(); 1421 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 1422 1423 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1424 MachineInstr &ExtrInst = 1425 *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1426 TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) 1427 .addReg(SrcReg) 1428 .addImm(Idx * DefSize); 1429 1430 if (!select(ExtrInst)) 1431 return false; 1432 } 1433 1434 I.eraseFromParent(); 1435 return true; 1436 } 1437 1438 bool X86InstructionSelector::selectMergeValues( 1439 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1440 assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || 1441 I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && 1442 "unexpected instruction"); 1443 1444 // Split to inserts. 1445 Register DstReg = I.getOperand(0).getReg(); 1446 Register SrcReg0 = I.getOperand(1).getReg(); 1447 1448 const LLT DstTy = MRI.getType(DstReg); 1449 const LLT SrcTy = MRI.getType(SrcReg0); 1450 unsigned SrcSize = SrcTy.getSizeInBits(); 1451 1452 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1453 1454 // For the first src use insertSubReg. 1455 Register DefReg = MRI.createGenericVirtualRegister(DstTy); 1456 MRI.setRegBank(DefReg, RegBank); 1457 if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) 1458 return false; 1459 1460 for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { 1461 Register Tmp = MRI.createGenericVirtualRegister(DstTy); 1462 MRI.setRegBank(Tmp, RegBank); 1463 1464 MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1465 TII.get(TargetOpcode::G_INSERT), Tmp) 1466 .addReg(DefReg) 1467 .addReg(I.getOperand(Idx).getReg()) 1468 .addImm((Idx - 1) * SrcSize); 1469 1470 DefReg = Tmp; 1471 1472 if (!select(InsertInst)) 1473 return false; 1474 } 1475 1476 MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1477 TII.get(TargetOpcode::COPY), DstReg) 1478 .addReg(DefReg); 1479 1480 if (!select(CopyInst)) 1481 return false; 1482 1483 I.eraseFromParent(); 1484 return true; 1485 } 1486 1487 bool X86InstructionSelector::selectCondBranch(MachineInstr &I, 1488 MachineRegisterInfo &MRI, 1489 MachineFunction &MF) const { 1490 assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); 1491 1492 const Register CondReg = I.getOperand(0).getReg(); 1493 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1494 1495 MachineInstr &TestInst = 1496 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri)) 1497 .addReg(CondReg) 1498 .addImm(1); 1499 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1)) 1500 .addMBB(DestMBB).addImm(X86::COND_NE); 1501 1502 constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI); 1503 1504 I.eraseFromParent(); 1505 return true; 1506 } 1507 1508 bool X86InstructionSelector::materializeFP(MachineInstr &I, 1509 MachineRegisterInfo &MRI, 1510 MachineFunction &MF) const { 1511 assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && 1512 "unexpected instruction"); 1513 1514 // Can't handle alternate code models yet. 1515 CodeModel::Model CM = TM.getCodeModel(); 1516 if (CM != CodeModel::Small && CM != CodeModel::Large) 1517 return false; 1518 1519 const Register DstReg = I.getOperand(0).getReg(); 1520 const LLT DstTy = MRI.getType(DstReg); 1521 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1522 // Create the load from the constant pool. 1523 const ConstantFP *CFP = I.getOperand(1).getFPImm(); 1524 const auto &DL = MF.getDataLayout(); 1525 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 1526 const DebugLoc &DbgLoc = I.getDebugLoc(); 1527 1528 unsigned Opc = 1529 getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment); 1530 1531 unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment); 1532 MachineInstr *LoadInst = nullptr; 1533 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 1534 1535 if (CM == CodeModel::Large && STI.is64Bit()) { 1536 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 1537 // they cannot be folded into immediate fields. 1538 1539 Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); 1540 BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) 1541 .addConstantPoolIndex(CPI, 0, OpFlag); 1542 1543 MachineMemOperand *MMO = MF.getMachineMemOperand( 1544 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 1545 LLT::pointer(0, DL.getPointerSizeInBits()), Alignment); 1546 1547 LoadInst = 1548 addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), 1549 AddrReg) 1550 .addMemOperand(MMO); 1551 1552 } else if (CM == CodeModel::Small || !STI.is64Bit()) { 1553 // Handle the case when globals fit in our immediate field. 1554 // This is true for X86-32 always and X86-64 when in -mcmodel=small mode. 1555 1556 // x86-32 PIC requires a PIC base register for constant pools. 1557 unsigned PICBase = 0; 1558 if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) { 1559 // PICBase can be allocated by TII.getGlobalBaseReg(&MF). 1560 // In DAGISEL the code that initialize it generated by the CGBR pass. 1561 return false; // TODO support the mode. 1562 } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small) 1563 PICBase = X86::RIP; 1564 1565 LoadInst = addConstantPoolReference( 1566 BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase, 1567 OpFlag); 1568 } else 1569 return false; 1570 1571 constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI); 1572 I.eraseFromParent(); 1573 return true; 1574 } 1575 1576 bool X86InstructionSelector::selectImplicitDefOrPHI( 1577 MachineInstr &I, MachineRegisterInfo &MRI) const { 1578 assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || 1579 I.getOpcode() == TargetOpcode::G_PHI) && 1580 "unexpected instruction"); 1581 1582 Register DstReg = I.getOperand(0).getReg(); 1583 1584 if (!MRI.getRegClassOrNull(DstReg)) { 1585 const LLT DstTy = MRI.getType(DstReg); 1586 const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI); 1587 1588 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { 1589 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1590 << " operand\n"); 1591 return false; 1592 } 1593 } 1594 1595 if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF) 1596 I.setDesc(TII.get(X86::IMPLICIT_DEF)); 1597 else 1598 I.setDesc(TII.get(X86::PHI)); 1599 1600 return true; 1601 } 1602 1603 bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, 1604 MachineRegisterInfo &MRI, 1605 MachineFunction &MF) const { 1606 // The implementation of this function is adapted from X86FastISel. 1607 assert((I.getOpcode() == TargetOpcode::G_MUL || 1608 I.getOpcode() == TargetOpcode::G_SMULH || 1609 I.getOpcode() == TargetOpcode::G_UMULH || 1610 I.getOpcode() == TargetOpcode::G_SDIV || 1611 I.getOpcode() == TargetOpcode::G_SREM || 1612 I.getOpcode() == TargetOpcode::G_UDIV || 1613 I.getOpcode() == TargetOpcode::G_UREM) && 1614 "unexpected instruction"); 1615 1616 const Register DstReg = I.getOperand(0).getReg(); 1617 const Register Op1Reg = I.getOperand(1).getReg(); 1618 const Register Op2Reg = I.getOperand(2).getReg(); 1619 1620 const LLT RegTy = MRI.getType(DstReg); 1621 assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && 1622 "Arguments and return value types must match"); 1623 1624 const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI); 1625 if (!RegRB || RegRB->getID() != X86::GPRRegBankID) 1626 return false; 1627 1628 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1629 const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh 1630 const static bool S = true; // IsSigned 1631 const static bool U = false; // !IsSigned 1632 const static unsigned Copy = TargetOpcode::COPY; 1633 1634 // For the X86 IDIV instruction, in most cases the dividend 1635 // (numerator) must be in a specific register pair highreg:lowreg, 1636 // producing the quotient in lowreg and the remainder in highreg. 1637 // For most data types, to set up the instruction, the dividend is 1638 // copied into lowreg, and lowreg is sign-extended into highreg. The 1639 // exception is i8, where the dividend is defined as a single register rather 1640 // than a register pair, and we therefore directly sign-extend the dividend 1641 // into lowreg, instead of copying, and ignore the highreg. 1642 const static struct MulDivRemEntry { 1643 // The following portion depends only on the data type. 1644 unsigned SizeInBits; 1645 unsigned LowInReg; // low part of the register pair 1646 unsigned HighInReg; // high part of the register pair 1647 // The following portion depends on both the data type and the operation. 1648 struct MulDivRemResult { 1649 unsigned OpMulDivRem; // The specific MUL/DIV opcode to use. 1650 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1651 // highreg, or copying a zero into highreg. 1652 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1653 // zero/sign-extending into lowreg for i8. 1654 unsigned ResultReg; // Register containing the desired result. 1655 bool IsOpSigned; // Whether to use signed or unsigned form. 1656 } ResultTable[NumOps]; 1657 } OpTable[NumTypes] = { 1658 {8, 1659 X86::AX, 1660 0, 1661 { 1662 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv 1663 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem 1664 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv 1665 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem 1666 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul 1667 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH 1668 {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH 1669 }}, // i8 1670 {16, 1671 X86::AX, 1672 X86::DX, 1673 { 1674 {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv 1675 {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem 1676 {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv 1677 {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem 1678 {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul 1679 {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH 1680 {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH 1681 }}, // i16 1682 {32, 1683 X86::EAX, 1684 X86::EDX, 1685 { 1686 {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv 1687 {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem 1688 {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv 1689 {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem 1690 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul 1691 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH 1692 {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH 1693 }}, // i32 1694 {64, 1695 X86::RAX, 1696 X86::RDX, 1697 { 1698 {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv 1699 {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem 1700 {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv 1701 {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem 1702 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul 1703 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH 1704 {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH 1705 }}, // i64 1706 }; 1707 1708 auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) { 1709 return El.SizeInBits == RegTy.getSizeInBits(); 1710 }); 1711 if (OpEntryIt == std::end(OpTable)) 1712 return false; 1713 1714 unsigned OpIndex; 1715 switch (I.getOpcode()) { 1716 default: 1717 llvm_unreachable("Unexpected mul/div/rem opcode"); 1718 case TargetOpcode::G_SDIV: 1719 OpIndex = 0; 1720 break; 1721 case TargetOpcode::G_SREM: 1722 OpIndex = 1; 1723 break; 1724 case TargetOpcode::G_UDIV: 1725 OpIndex = 2; 1726 break; 1727 case TargetOpcode::G_UREM: 1728 OpIndex = 3; 1729 break; 1730 case TargetOpcode::G_MUL: 1731 OpIndex = 4; 1732 break; 1733 case TargetOpcode::G_SMULH: 1734 OpIndex = 5; 1735 break; 1736 case TargetOpcode::G_UMULH: 1737 OpIndex = 6; 1738 break; 1739 } 1740 1741 const MulDivRemEntry &TypeEntry = *OpEntryIt; 1742 const MulDivRemEntry::MulDivRemResult &OpEntry = 1743 TypeEntry.ResultTable[OpIndex]; 1744 1745 const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB); 1746 if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) || 1747 !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) || 1748 !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) { 1749 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1750 << " operand\n"); 1751 return false; 1752 } 1753 1754 // Move op1 into low-order input register. 1755 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy), 1756 TypeEntry.LowInReg) 1757 .addReg(Op1Reg); 1758 1759 // Zero-extend or sign-extend into high-order input register. 1760 if (OpEntry.OpSignExtend) { 1761 if (OpEntry.IsOpSigned) 1762 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1763 TII.get(OpEntry.OpSignExtend)); 1764 else { 1765 Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); 1766 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), 1767 Zero32); 1768 1769 // Copy the zero into the appropriate sub/super/identical physical 1770 // register. Unfortunately the operations needed are not uniform enough 1771 // to fit neatly into the table above. 1772 if (RegTy.getSizeInBits() == 16) { 1773 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1774 TypeEntry.HighInReg) 1775 .addReg(Zero32, 0, X86::sub_16bit); 1776 } else if (RegTy.getSizeInBits() == 32) { 1777 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1778 TypeEntry.HighInReg) 1779 .addReg(Zero32); 1780 } else if (RegTy.getSizeInBits() == 64) { 1781 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1782 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1783 .addImm(0) 1784 .addReg(Zero32) 1785 .addImm(X86::sub_32bit); 1786 } 1787 } 1788 } 1789 1790 // Generate the DIV/IDIV/MUL/IMUL instruction. 1791 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem)) 1792 .addReg(Op2Reg); 1793 1794 // For i8 remainder, we can't reference ah directly, as we'll end 1795 // up with bogus copies like %r9b = COPY %ah. Reference ax 1796 // instead to prevent ah references in a rex instruction. 1797 // 1798 // The current assumption of the fast register allocator is that isel 1799 // won't generate explicit references to the GR8_NOREX registers. If 1800 // the allocator and/or the backend get enhanced to be more robust in 1801 // that regard, this can be, and should be, removed. 1802 if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) { 1803 Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1804 Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1805 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) 1806 .addReg(X86::AX); 1807 1808 // Shift AX right by 8 bits instead of using AH. 1809 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri), 1810 ResultSuperReg) 1811 .addReg(SourceSuperReg) 1812 .addImm(8); 1813 1814 // Now reference the 8-bit subreg of the result. 1815 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1816 DstReg) 1817 .addReg(ResultSuperReg, 0, X86::sub_8bit); 1818 } else { 1819 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1820 DstReg) 1821 .addReg(OpEntry.ResultReg); 1822 } 1823 I.eraseFromParent(); 1824 1825 return true; 1826 } 1827 1828 bool X86InstructionSelector::selectSelect(MachineInstr &I, 1829 MachineRegisterInfo &MRI, 1830 MachineFunction &MF) const { 1831 GSelect &Sel = cast<GSelect>(I); 1832 unsigned DstReg = Sel.getReg(0); 1833 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr)) 1834 .addReg(Sel.getCondReg()) 1835 .addReg(Sel.getCondReg()); 1836 1837 unsigned OpCmp; 1838 LLT Ty = MRI.getType(DstReg); 1839 switch (Ty.getSizeInBits()) { 1840 default: 1841 return false; 1842 case 8: 1843 OpCmp = X86::CMOV_GR8; 1844 break; 1845 case 16: 1846 OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; 1847 break; 1848 case 32: 1849 OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; 1850 break; 1851 case 64: 1852 assert(STI.is64Bit() && STI.canUseCMOV()); 1853 OpCmp = X86::CMOV64rr; 1854 break; 1855 } 1856 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) 1857 .addReg(Sel.getTrueReg()) 1858 .addReg(Sel.getFalseReg()) 1859 .addImm(X86::COND_E); 1860 1861 const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI); 1862 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1863 LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n"); 1864 return false; 1865 } 1866 1867 Sel.eraseFromParent(); 1868 return true; 1869 } 1870 1871 InstructionSelector * 1872 llvm::createX86InstructionSelector(const X86TargetMachine &TM, 1873 const X86Subtarget &Subtarget, 1874 const X86RegisterBankInfo &RBI) { 1875 return new X86InstructionSelector(TM, Subtarget, RBI); 1876 } 1877