10b57cec5SDimitry Andric //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// Custom DAG lowering for R600 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "R600ISelLowering.h" 15e8d8bef9SDimitry Andric #include "AMDGPU.h" 16349cc55cSDimitry Andric #include "MCTargetDesc/R600MCTargetDesc.h" 170b57cec5SDimitry Andric #include "R600Defines.h" 180b57cec5SDimitry Andric #include "R600InstrInfo.h" 190b57cec5SDimitry Andric #include "R600MachineFunctionInfo.h" 20e8d8bef9SDimitry Andric #include "R600Subtarget.h" 21bdd1243dSDimitry Andric #include "R600TargetMachine.h" 22349cc55cSDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 23e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 24480093f4SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric using namespace llvm; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric #include "R600GenCallingConv.inc" 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric R600TargetLowering::R600TargetLowering(const TargetMachine &TM, 310b57cec5SDimitry Andric const R600Subtarget &STI) 320b57cec5SDimitry Andric : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { 330b57cec5SDimitry Andric addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); 340b57cec5SDimitry Andric addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); 350b57cec5SDimitry Andric addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); 360b57cec5SDimitry Andric addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); 370b57cec5SDimitry Andric addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); 380b57cec5SDimitry Andric addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); 390b57cec5SDimitry Andric 40480093f4SDimitry Andric setBooleanContents(ZeroOrNegativeOneBooleanContent); 41480093f4SDimitry Andric setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 42480093f4SDimitry Andric 430b57cec5SDimitry Andric computeRegisterProperties(Subtarget->getRegisterInfo()); 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric // Legalize loads and stores to the private address space. 4681ad6265SDimitry Andric setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom); 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address 490b57cec5SDimitry Andric // spaces, so it is custom lowered to handle those where it isn't. 5081ad6265SDimitry Andric for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) 510b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 5281ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i1, Promote); 5381ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i8, Custom); 5481ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i16, Custom); 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. 5881ad6265SDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32, 5981ad6265SDimitry Andric MVT::v2i1, Expand); 600b57cec5SDimitry Andric 6181ad6265SDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v4i32, 6281ad6265SDimitry Andric MVT::v4i1, Expand); 630b57cec5SDimitry Andric 6481ad6265SDimitry Andric setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32}, 6581ad6265SDimitry Andric Custom); 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric setTruncStoreAction(MVT::i32, MVT::i8, Custom); 680b57cec5SDimitry Andric setTruncStoreAction(MVT::i32, MVT::i16, Custom); 690b57cec5SDimitry Andric // We need to include these since trunc STORES to PRIVATE need 700b57cec5SDimitry Andric // special handling to accommodate RMW 710b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 720b57cec5SDimitry Andric setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); 730b57cec5SDimitry Andric setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); 740b57cec5SDimitry Andric setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); 750b57cec5SDimitry Andric setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); 760b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 770b57cec5SDimitry Andric setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 780b57cec5SDimitry Andric setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); 790b57cec5SDimitry Andric setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); 800b57cec5SDimitry Andric setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. 830b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); 840b57cec5SDimitry Andric setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric // Set condition code actions 8781ad6265SDimitry Andric setCondCodeAction({ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT, 8881ad6265SDimitry Andric ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE, 8981ad6265SDimitry Andric ISD::SETUGT, ISD::SETULT, ISD::SETULE}, 9081ad6265SDimitry Andric MVT::f32, Expand); 910b57cec5SDimitry Andric 9281ad6265SDimitry Andric setCondCodeAction({ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT}, 9381ad6265SDimitry Andric MVT::i32, Expand); 940b57cec5SDimitry Andric 9581ad6265SDimitry Andric setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom); 960b57cec5SDimitry Andric 9781ad6265SDimitry Andric setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand); 980b57cec5SDimitry Andric 9981ad6265SDimitry Andric setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand); 1000b57cec5SDimitry Andric setOperationAction(ISD::BRCOND, MVT::Other, Custom); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric setOperationAction(ISD::FSUB, MVT::f32, Expand); 1030b57cec5SDimitry Andric 1045f757f3fSDimitry Andric setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR}, 10581ad6265SDimitry Andric MVT::f64, Custom); 1060b57cec5SDimitry Andric 10781ad6265SDimitry Andric setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom); 1080b57cec5SDimitry Andric 10981ad6265SDimitry Andric setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand); 11081ad6265SDimitry Andric setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64}, 11181ad6265SDimitry Andric Custom); 1120b57cec5SDimitry Andric 11381ad6265SDimitry Andric setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32}, 11481ad6265SDimitry Andric Expand); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric // ADD, SUB overflow. 1170b57cec5SDimitry Andric // TODO: turn these into Legal? 1180b57cec5SDimitry Andric if (Subtarget->hasCARRY()) 1190b57cec5SDimitry Andric setOperationAction(ISD::UADDO, MVT::i32, Custom); 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric if (Subtarget->hasBORROW()) 1220b57cec5SDimitry Andric setOperationAction(ISD::USUBO, MVT::i32, Custom); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric // Expand sign extension of vectors 1250b57cec5SDimitry Andric if (!Subtarget->hasBFE()) 1260b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 1270b57cec5SDimitry Andric 12881ad6265SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric if (!Subtarget->hasBFE()) 1310b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 13281ad6265SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand); 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric if (!Subtarget->hasBFE()) 1350b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 13681ad6265SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 13981ad6265SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 1440b57cec5SDimitry Andric 14581ad6265SDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, 14681ad6265SDimitry Andric {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom); 1470b57cec5SDimitry Andric 14881ad6265SDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, 14981ad6265SDimitry Andric {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom); 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 1520b57cec5SDimitry Andric // to be Legal/Custom in order to avoid library calls. 15381ad6265SDimitry Andric setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, MVT::i32, 15481ad6265SDimitry Andric Custom); 1550b57cec5SDimitry Andric 15681ad6265SDimitry Andric if (!Subtarget->hasFMA()) 15781ad6265SDimitry Andric setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand); 1580b57cec5SDimitry Andric 159480093f4SDimitry Andric // FIXME: May need no denormals check 1600b57cec5SDimitry Andric setOperationAction(ISD::FMAD, MVT::f32, Legal); 1610b57cec5SDimitry Andric 16281ad6265SDimitry Andric if (!Subtarget->hasBFI()) 1630b57cec5SDimitry Andric // fcopysign can be done in a single instruction with BFI. 16481ad6265SDimitry Andric setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand); 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric if (!Subtarget->hasBCNT(32)) 1670b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, MVT::i32, Expand); 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric if (!Subtarget->hasBCNT(64)) 1700b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, MVT::i64, Expand); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric if (Subtarget->hasFFBH()) 1730b57cec5SDimitry Andric setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (Subtarget->hasFFBL()) 1760b57cec5SDimitry Andric setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we 1790b57cec5SDimitry Andric // need it for R600. 1800b57cec5SDimitry Andric if (Subtarget->hasBFE()) 1810b57cec5SDimitry Andric setHasExtractBitsInsn(true); 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 184bdd1243dSDimitry Andric setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 18781ad6265SDimitry Andric for (MVT VT : ScalarIntVTs) 18881ad6265SDimitry Andric setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, 18981ad6265SDimitry Andric Expand); 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric // LLVM will expand these to atomic_cmp_swap(0) 1920b57cec5SDimitry Andric // and atomic_swap, respectively. 19381ad6265SDimitry Andric setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand); 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // We need to custom lower some of the intrinsics 19681ad6265SDimitry Andric setOperationAction({ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, MVT::Other, 19781ad6265SDimitry Andric Custom); 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric setSchedulingPreference(Sched::Source); 2000b57cec5SDimitry Andric 20181ad6265SDimitry Andric setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT, 20281ad6265SDimitry Andric ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD}); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric static inline bool isEOP(MachineBasicBlock::iterator I) { 2060b57cec5SDimitry Andric if (std::next(I) == I->getParent()->end()) 2070b57cec5SDimitry Andric return false; 2080b57cec5SDimitry Andric return std::next(I)->getOpcode() == R600::RETURN; 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric MachineBasicBlock * 2120b57cec5SDimitry Andric R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2130b57cec5SDimitry Andric MachineBasicBlock *BB) const { 2140b57cec5SDimitry Andric MachineFunction *MF = BB->getParent(); 2150b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 2160b57cec5SDimitry Andric MachineBasicBlock::iterator I = MI; 2170b57cec5SDimitry Andric const R600InstrInfo *TII = Subtarget->getInstrInfo(); 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric switch (MI.getOpcode()) { 2200b57cec5SDimitry Andric default: 2210b57cec5SDimitry Andric // Replace LDS_*_RET instruction that don't have any uses with the 2220b57cec5SDimitry Andric // equivalent LDS_*_NORET instruction. 2230b57cec5SDimitry Andric if (TII->isLDSRetInstr(MI.getOpcode())) { 2240b57cec5SDimitry Andric int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); 2250b57cec5SDimitry Andric assert(DstIdx != -1); 2260b57cec5SDimitry Andric MachineInstrBuilder NewMI; 2270b57cec5SDimitry Andric // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add 2280b57cec5SDimitry Andric // LDS_1A2D support and remove this special case. 2290b57cec5SDimitry Andric if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || 2300b57cec5SDimitry Andric MI.getOpcode() == R600::LDS_CMPST_RET) 2310b57cec5SDimitry Andric return BB; 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), 2340b57cec5SDimitry Andric TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); 2354824e7fdSDimitry Andric for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 2364824e7fdSDimitry Andric NewMI.add(MO); 2370b57cec5SDimitry Andric } else { 2380b57cec5SDimitry Andric return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric break; 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric case R600::FABS_R600: { 2430b57cec5SDimitry Andric MachineInstr *NewMI = TII->buildDefaultInstruction( 2440b57cec5SDimitry Andric *BB, I, R600::MOV, MI.getOperand(0).getReg(), 2450b57cec5SDimitry Andric MI.getOperand(1).getReg()); 2460b57cec5SDimitry Andric TII->addFlag(*NewMI, 0, MO_FLAG_ABS); 2470b57cec5SDimitry Andric break; 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric case R600::FNEG_R600: { 2510b57cec5SDimitry Andric MachineInstr *NewMI = TII->buildDefaultInstruction( 2520b57cec5SDimitry Andric *BB, I, R600::MOV, MI.getOperand(0).getReg(), 2530b57cec5SDimitry Andric MI.getOperand(1).getReg()); 2540b57cec5SDimitry Andric TII->addFlag(*NewMI, 0, MO_FLAG_NEG); 2550b57cec5SDimitry Andric break; 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric case R600::MASK_WRITE: { 2598bcb0991SDimitry Andric Register maskedRegister = MI.getOperand(0).getReg(); 260e8d8bef9SDimitry Andric assert(maskedRegister.isVirtual()); 2610b57cec5SDimitry Andric MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 2620b57cec5SDimitry Andric TII->addFlag(*defInstr, 0, MO_FLAG_MASK); 2630b57cec5SDimitry Andric break; 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric case R600::MOV_IMM_F32: 2670b57cec5SDimitry Andric TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) 2680b57cec5SDimitry Andric .getFPImm() 2690b57cec5SDimitry Andric ->getValueAPF() 2700b57cec5SDimitry Andric .bitcastToAPInt() 2710b57cec5SDimitry Andric .getZExtValue()); 2720b57cec5SDimitry Andric break; 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric case R600::MOV_IMM_I32: 2750b57cec5SDimitry Andric TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), 2760b57cec5SDimitry Andric MI.getOperand(1).getImm()); 2770b57cec5SDimitry Andric break; 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric case R600::MOV_IMM_GLOBAL_ADDR: { 2800b57cec5SDimitry Andric //TODO: Perhaps combine this instruction with the next if possible 2810b57cec5SDimitry Andric auto MIB = TII->buildDefaultInstruction( 2820b57cec5SDimitry Andric *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); 2830b57cec5SDimitry Andric int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); 2840b57cec5SDimitry Andric //TODO: Ugh this is rather ugly 285349cc55cSDimitry Andric const MachineOperand &MO = MI.getOperand(1); 286349cc55cSDimitry Andric MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(), 287349cc55cSDimitry Andric MO.getTargetFlags()); 2880b57cec5SDimitry Andric break; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric case R600::CONST_COPY: { 2920b57cec5SDimitry Andric MachineInstr *NewMI = TII->buildDefaultInstruction( 2930b57cec5SDimitry Andric *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); 2940b57cec5SDimitry Andric TII->setImmOperand(*NewMI, R600::OpName::src0_sel, 2950b57cec5SDimitry Andric MI.getOperand(1).getImm()); 2960b57cec5SDimitry Andric break; 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric case R600::RAT_WRITE_CACHELESS_32_eg: 3000b57cec5SDimitry Andric case R600::RAT_WRITE_CACHELESS_64_eg: 3010b57cec5SDimitry Andric case R600::RAT_WRITE_CACHELESS_128_eg: 3020b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 3030b57cec5SDimitry Andric .add(MI.getOperand(0)) 3040b57cec5SDimitry Andric .add(MI.getOperand(1)) 3050b57cec5SDimitry Andric .addImm(isEOP(I)); // Set End of program bit 3060b57cec5SDimitry Andric break; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric case R600::RAT_STORE_TYPED_eg: 3090b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 3100b57cec5SDimitry Andric .add(MI.getOperand(0)) 3110b57cec5SDimitry Andric .add(MI.getOperand(1)) 3120b57cec5SDimitry Andric .add(MI.getOperand(2)) 3130b57cec5SDimitry Andric .addImm(isEOP(I)); // Set End of program bit 3140b57cec5SDimitry Andric break; 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric case R600::BRANCH: 3170b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) 3180b57cec5SDimitry Andric .add(MI.getOperand(0)); 3190b57cec5SDimitry Andric break; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric case R600::BRANCH_COND_f32: { 3220b57cec5SDimitry Andric MachineInstr *NewMI = 3230b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 3240b57cec5SDimitry Andric R600::PREDICATE_BIT) 3250b57cec5SDimitry Andric .add(MI.getOperand(1)) 3260b57cec5SDimitry Andric .addImm(R600::PRED_SETNE) 3270b57cec5SDimitry Andric .addImm(0); // Flags 3280b57cec5SDimitry Andric TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 3290b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 3300b57cec5SDimitry Andric .add(MI.getOperand(0)) 3310b57cec5SDimitry Andric .addReg(R600::PREDICATE_BIT, RegState::Kill); 3320b57cec5SDimitry Andric break; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric case R600::BRANCH_COND_i32: { 3360b57cec5SDimitry Andric MachineInstr *NewMI = 3370b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 3380b57cec5SDimitry Andric R600::PREDICATE_BIT) 3390b57cec5SDimitry Andric .add(MI.getOperand(1)) 3400b57cec5SDimitry Andric .addImm(R600::PRED_SETNE_INT) 3410b57cec5SDimitry Andric .addImm(0); // Flags 3420b57cec5SDimitry Andric TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 3430b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 3440b57cec5SDimitry Andric .add(MI.getOperand(0)) 3450b57cec5SDimitry Andric .addReg(R600::PREDICATE_BIT, RegState::Kill); 3460b57cec5SDimitry Andric break; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric case R600::EG_ExportSwz: 3500b57cec5SDimitry Andric case R600::R600_ExportSwz: { 3510b57cec5SDimitry Andric // Instruction is left unmodified if its not the last one of its type 3520b57cec5SDimitry Andric bool isLastInstructionOfItsType = true; 3530b57cec5SDimitry Andric unsigned InstExportType = MI.getOperand(1).getImm(); 3540b57cec5SDimitry Andric for (MachineBasicBlock::iterator NextExportInst = std::next(I), 3550b57cec5SDimitry Andric EndBlock = BB->end(); NextExportInst != EndBlock; 3560b57cec5SDimitry Andric NextExportInst = std::next(NextExportInst)) { 3570b57cec5SDimitry Andric if (NextExportInst->getOpcode() == R600::EG_ExportSwz || 3580b57cec5SDimitry Andric NextExportInst->getOpcode() == R600::R600_ExportSwz) { 3590b57cec5SDimitry Andric unsigned CurrentInstExportType = NextExportInst->getOperand(1) 3600b57cec5SDimitry Andric .getImm(); 3610b57cec5SDimitry Andric if (CurrentInstExportType == InstExportType) { 3620b57cec5SDimitry Andric isLastInstructionOfItsType = false; 3630b57cec5SDimitry Andric break; 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric bool EOP = isEOP(I); 3680b57cec5SDimitry Andric if (!EOP && !isLastInstructionOfItsType) 3690b57cec5SDimitry Andric return BB; 3700b57cec5SDimitry Andric unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; 3710b57cec5SDimitry Andric BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 3720b57cec5SDimitry Andric .add(MI.getOperand(0)) 3730b57cec5SDimitry Andric .add(MI.getOperand(1)) 3740b57cec5SDimitry Andric .add(MI.getOperand(2)) 3750b57cec5SDimitry Andric .add(MI.getOperand(3)) 3760b57cec5SDimitry Andric .add(MI.getOperand(4)) 3770b57cec5SDimitry Andric .add(MI.getOperand(5)) 3780b57cec5SDimitry Andric .add(MI.getOperand(6)) 3790b57cec5SDimitry Andric .addImm(CfInst) 3800b57cec5SDimitry Andric .addImm(EOP); 3810b57cec5SDimitry Andric break; 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric case R600::RETURN: { 3840b57cec5SDimitry Andric return BB; 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric MI.eraseFromParent(); 3890b57cec5SDimitry Andric return BB; 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3930b57cec5SDimitry Andric // Custom DAG Lowering Operations 3940b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 3970b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 3980b57cec5SDimitry Andric R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 3990b57cec5SDimitry Andric switch (Op.getOpcode()) { 4000b57cec5SDimitry Andric default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 4010b57cec5SDimitry Andric case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4020b57cec5SDimitry Andric case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 403fe6060f1SDimitry Andric case ISD::SHL_PARTS: 4040b57cec5SDimitry Andric case ISD::SRA_PARTS: 405fe6060f1SDimitry Andric case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); 4060b57cec5SDimitry Andric case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); 4070b57cec5SDimitry Andric case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); 4080b57cec5SDimitry Andric case ISD::FCOS: 4090b57cec5SDimitry Andric case ISD::FSIN: return LowerTrig(Op, DAG); 4100b57cec5SDimitry Andric case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 4110b57cec5SDimitry Andric case ISD::STORE: return LowerSTORE(Op, DAG); 4120b57cec5SDimitry Andric case ISD::LOAD: { 4130b57cec5SDimitry Andric SDValue Result = LowerLOAD(Op, DAG); 4140b57cec5SDimitry Andric assert((!Result.getNode() || 4150b57cec5SDimitry Andric Result.getNode()->getNumValues() == 2) && 4160b57cec5SDimitry Andric "Load should return a value and a chain"); 4170b57cec5SDimitry Andric return Result; 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4210b57cec5SDimitry Andric case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); 4220b57cec5SDimitry Andric case ISD::FrameIndex: return lowerFrameIndex(Op, DAG); 423bdd1243dSDimitry Andric case ISD::ADDRSPACECAST: 424bdd1243dSDimitry Andric return lowerADDRSPACECAST(Op, DAG); 4250b57cec5SDimitry Andric case ISD::INTRINSIC_VOID: { 4260b57cec5SDimitry Andric SDValue Chain = Op.getOperand(0); 427647cbc5dSDimitry Andric unsigned IntrinsicID = Op.getConstantOperandVal(1); 4280b57cec5SDimitry Andric switch (IntrinsicID) { 4290b57cec5SDimitry Andric case Intrinsic::r600_store_swizzle: { 4300b57cec5SDimitry Andric SDLoc DL(Op); 4310b57cec5SDimitry Andric const SDValue Args[8] = { 4320b57cec5SDimitry Andric Chain, 4330b57cec5SDimitry Andric Op.getOperand(2), // Export Value 4340b57cec5SDimitry Andric Op.getOperand(3), // ArrayBase 4350b57cec5SDimitry Andric Op.getOperand(4), // Type 4360b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), // SWZ_X 4370b57cec5SDimitry Andric DAG.getConstant(1, DL, MVT::i32), // SWZ_Y 4380b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32), // SWZ_Z 4390b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32) // SWZ_W 4400b57cec5SDimitry Andric }; 4410b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); 4420b57cec5SDimitry Andric } 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric // default for switch(IntrinsicID) 4450b57cec5SDimitry Andric default: break; 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 4480b57cec5SDimitry Andric break; 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 451647cbc5dSDimitry Andric unsigned IntrinsicID = Op.getConstantOperandVal(0); 4520b57cec5SDimitry Andric EVT VT = Op.getValueType(); 4530b57cec5SDimitry Andric SDLoc DL(Op); 4540b57cec5SDimitry Andric switch (IntrinsicID) { 4550b57cec5SDimitry Andric case Intrinsic::r600_tex: 4560b57cec5SDimitry Andric case Intrinsic::r600_texc: { 4570b57cec5SDimitry Andric unsigned TextureOp; 4580b57cec5SDimitry Andric switch (IntrinsicID) { 4590b57cec5SDimitry Andric case Intrinsic::r600_tex: 4600b57cec5SDimitry Andric TextureOp = 0; 4610b57cec5SDimitry Andric break; 4620b57cec5SDimitry Andric case Intrinsic::r600_texc: 4630b57cec5SDimitry Andric TextureOp = 1; 4640b57cec5SDimitry Andric break; 4650b57cec5SDimitry Andric default: 4660b57cec5SDimitry Andric llvm_unreachable("unhandled texture operation"); 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric SDValue TexArgs[19] = { 4700b57cec5SDimitry Andric DAG.getConstant(TextureOp, DL, MVT::i32), 4710b57cec5SDimitry Andric Op.getOperand(1), 4720b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), 4730b57cec5SDimitry Andric DAG.getConstant(1, DL, MVT::i32), 4740b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32), 4750b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32), 4760b57cec5SDimitry Andric Op.getOperand(2), 4770b57cec5SDimitry Andric Op.getOperand(3), 4780b57cec5SDimitry Andric Op.getOperand(4), 4790b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), 4800b57cec5SDimitry Andric DAG.getConstant(1, DL, MVT::i32), 4810b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32), 4820b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32), 4830b57cec5SDimitry Andric Op.getOperand(5), 4840b57cec5SDimitry Andric Op.getOperand(6), 4850b57cec5SDimitry Andric Op.getOperand(7), 4860b57cec5SDimitry Andric Op.getOperand(8), 4870b57cec5SDimitry Andric Op.getOperand(9), 4880b57cec5SDimitry Andric Op.getOperand(10) 4890b57cec5SDimitry Andric }; 4900b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); 4910b57cec5SDimitry Andric } 4920b57cec5SDimitry Andric case Intrinsic::r600_dot4: { 4930b57cec5SDimitry Andric SDValue Args[8] = { 4940b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 4950b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32)), 4960b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 4970b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32)), 4980b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 4990b57cec5SDimitry Andric DAG.getConstant(1, DL, MVT::i32)), 5000b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 5010b57cec5SDimitry Andric DAG.getConstant(1, DL, MVT::i32)), 5020b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 5030b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32)), 5040b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 5050b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32)), 5060b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 5070b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32)), 5080b57cec5SDimitry Andric DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 5090b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32)) 5100b57cec5SDimitry Andric }; 5110b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric case Intrinsic::r600_implicitarg_ptr: { 5150b57cec5SDimitry Andric MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); 5160b57cec5SDimitry Andric uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); 5170b57cec5SDimitry Andric return DAG.getConstant(ByteOffset, DL, PtrVT); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric case Intrinsic::r600_read_ngroups_x: 5200b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 0); 5210b57cec5SDimitry Andric case Intrinsic::r600_read_ngroups_y: 5220b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 1); 5230b57cec5SDimitry Andric case Intrinsic::r600_read_ngroups_z: 5240b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 2); 5250b57cec5SDimitry Andric case Intrinsic::r600_read_global_size_x: 5260b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 3); 5270b57cec5SDimitry Andric case Intrinsic::r600_read_global_size_y: 5280b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 4); 5290b57cec5SDimitry Andric case Intrinsic::r600_read_global_size_z: 5300b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 5); 5310b57cec5SDimitry Andric case Intrinsic::r600_read_local_size_x: 5320b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 6); 5330b57cec5SDimitry Andric case Intrinsic::r600_read_local_size_y: 5340b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 7); 5350b57cec5SDimitry Andric case Intrinsic::r600_read_local_size_z: 5360b57cec5SDimitry Andric return LowerImplicitParameter(DAG, VT, DL, 8); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_x: 5395ffd83dbSDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 5400b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5410b57cec5SDimitry Andric R600::T1_X, VT); 5420b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_y: 5435ffd83dbSDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 5440b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5450b57cec5SDimitry Andric R600::T1_Y, VT); 5460b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_z: 5475ffd83dbSDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 5480b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5490b57cec5SDimitry Andric R600::T1_Z, VT); 5500b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_x: 5515ffd83dbSDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 5520b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5530b57cec5SDimitry Andric R600::T0_X, VT); 5540b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_y: 5555ffd83dbSDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 5560b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5570b57cec5SDimitry Andric R600::T0_Y, VT); 5580b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_z: 5595ffd83dbSDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 5600b57cec5SDimitry Andric return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 5610b57cec5SDimitry Andric R600::T0_Z, VT); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric case Intrinsic::r600_recipsqrt_ieee: 5640b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric case Intrinsic::r600_recipsqrt_clamped: 5670b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); 5680b57cec5SDimitry Andric default: 5690b57cec5SDimitry Andric return Op; 5700b57cec5SDimitry Andric } 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 5730b57cec5SDimitry Andric break; 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric } // end switch(Op.getOpcode()) 5760b57cec5SDimitry Andric return SDValue(); 5770b57cec5SDimitry Andric } 5780b57cec5SDimitry Andric 5790b57cec5SDimitry Andric void R600TargetLowering::ReplaceNodeResults(SDNode *N, 5800b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results, 5810b57cec5SDimitry Andric SelectionDAG &DAG) const { 5820b57cec5SDimitry Andric switch (N->getOpcode()) { 5830b57cec5SDimitry Andric default: 5840b57cec5SDimitry Andric AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); 5850b57cec5SDimitry Andric return; 5860b57cec5SDimitry Andric case ISD::FP_TO_UINT: 5870b57cec5SDimitry Andric if (N->getValueType(0) == MVT::i1) { 5880b57cec5SDimitry Andric Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); 5890b57cec5SDimitry Andric return; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric // Since we don't care about out of bounds values we can use FP_TO_SINT for 5920b57cec5SDimitry Andric // uints too. The DAGLegalizer code for uint considers some extra cases 5930b57cec5SDimitry Andric // which are not necessary here. 594bdd1243dSDimitry Andric [[fallthrough]]; 5950b57cec5SDimitry Andric case ISD::FP_TO_SINT: { 5960b57cec5SDimitry Andric if (N->getValueType(0) == MVT::i1) { 5970b57cec5SDimitry Andric Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); 5980b57cec5SDimitry Andric return; 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric SDValue Result; 6020b57cec5SDimitry Andric if (expandFP_TO_SINT(N, Result, DAG)) 6030b57cec5SDimitry Andric Results.push_back(Result); 6040b57cec5SDimitry Andric return; 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric case ISD::SDIVREM: { 6070b57cec5SDimitry Andric SDValue Op = SDValue(N, 1); 6080b57cec5SDimitry Andric SDValue RES = LowerSDIVREM(Op, DAG); 6090b57cec5SDimitry Andric Results.push_back(RES); 6100b57cec5SDimitry Andric Results.push_back(RES.getValue(1)); 6110b57cec5SDimitry Andric break; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric case ISD::UDIVREM: { 6140b57cec5SDimitry Andric SDValue Op = SDValue(N, 0); 6150b57cec5SDimitry Andric LowerUDIVREM64(Op, DAG, Results); 6160b57cec5SDimitry Andric break; 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, 6220b57cec5SDimitry Andric SDValue Vector) const { 6230b57cec5SDimitry Andric SDLoc DL(Vector); 6240b57cec5SDimitry Andric EVT VecVT = Vector.getValueType(); 6250b57cec5SDimitry Andric EVT EltVT = VecVT.getVectorElementType(); 6260b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) { 6295ffd83dbSDimitry Andric Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, 6305ffd83dbSDimitry Andric DAG.getVectorIdxConstant(i, DL))); 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); 6340b57cec5SDimitry Andric } 6350b57cec5SDimitry Andric 6360b57cec5SDimitry Andric SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 6370b57cec5SDimitry Andric SelectionDAG &DAG) const { 6380b57cec5SDimitry Andric SDLoc DL(Op); 6390b57cec5SDimitry Andric SDValue Vector = Op.getOperand(0); 6400b57cec5SDimitry Andric SDValue Index = Op.getOperand(1); 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric if (isa<ConstantSDNode>(Index) || 6430b57cec5SDimitry Andric Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 6440b57cec5SDimitry Andric return Op; 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric Vector = vectorToVerticalVector(DAG, Vector); 6470b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 6480b57cec5SDimitry Andric Vector, Index); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 6520b57cec5SDimitry Andric SelectionDAG &DAG) const { 6530b57cec5SDimitry Andric SDLoc DL(Op); 6540b57cec5SDimitry Andric SDValue Vector = Op.getOperand(0); 6550b57cec5SDimitry Andric SDValue Value = Op.getOperand(1); 6560b57cec5SDimitry Andric SDValue Index = Op.getOperand(2); 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric if (isa<ConstantSDNode>(Index) || 6590b57cec5SDimitry Andric Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 6600b57cec5SDimitry Andric return Op; 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric Vector = vectorToVerticalVector(DAG, Vector); 6630b57cec5SDimitry Andric SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), 6640b57cec5SDimitry Andric Vector, Value, Index); 6650b57cec5SDimitry Andric return vectorToVerticalVector(DAG, Insert); 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, 6690b57cec5SDimitry Andric SDValue Op, 6700b57cec5SDimitry Andric SelectionDAG &DAG) const { 6710b57cec5SDimitry Andric GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); 6720b57cec5SDimitry Andric if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 6730b57cec5SDimitry Andric return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 6760b57cec5SDimitry Andric const GlobalValue *GV = GSD->getGlobal(); 6770b57cec5SDimitry Andric MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); 6800b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { 6840b57cec5SDimitry Andric // On hw >= R700, COS/SIN input must be between -1. and 1. 6850b57cec5SDimitry Andric // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) 6860b57cec5SDimitry Andric EVT VT = Op.getValueType(); 6870b57cec5SDimitry Andric SDValue Arg = Op.getOperand(0); 6880b57cec5SDimitry Andric SDLoc DL(Op); 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 6910b57cec5SDimitry Andric SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, 6920b57cec5SDimitry Andric DAG.getNode(ISD::FADD, DL, VT, 6930b57cec5SDimitry Andric DAG.getNode(ISD::FMUL, DL, VT, Arg, 6940b57cec5SDimitry Andric DAG.getConstantFP(0.15915494309, DL, MVT::f32)), 6950b57cec5SDimitry Andric DAG.getConstantFP(0.5, DL, MVT::f32))); 6960b57cec5SDimitry Andric unsigned TrigNode; 6970b57cec5SDimitry Andric switch (Op.getOpcode()) { 6980b57cec5SDimitry Andric case ISD::FCOS: 6990b57cec5SDimitry Andric TrigNode = AMDGPUISD::COS_HW; 7000b57cec5SDimitry Andric break; 7010b57cec5SDimitry Andric case ISD::FSIN: 7020b57cec5SDimitry Andric TrigNode = AMDGPUISD::SIN_HW; 7030b57cec5SDimitry Andric break; 7040b57cec5SDimitry Andric default: 7050b57cec5SDimitry Andric llvm_unreachable("Wrong trig opcode"); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, 7080b57cec5SDimitry Andric DAG.getNode(ISD::FADD, DL, VT, FractPart, 7090b57cec5SDimitry Andric DAG.getConstantFP(-0.5, DL, MVT::f32))); 7100b57cec5SDimitry Andric if (Gen >= AMDGPUSubtarget::R700) 7110b57cec5SDimitry Andric return TrigVal; 7120b57cec5SDimitry Andric // On R600 hw, COS/SIN input must be between -Pi and Pi. 7130b57cec5SDimitry Andric return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, 7148bcb0991SDimitry Andric DAG.getConstantFP(numbers::pif, DL, MVT::f32)); 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 717fe6060f1SDimitry Andric SDValue R600TargetLowering::LowerShiftParts(SDValue Op, 718fe6060f1SDimitry Andric SelectionDAG &DAG) const { 719fe6060f1SDimitry Andric SDValue Lo, Hi; 720fe6060f1SDimitry Andric expandShiftParts(Op.getNode(), Lo, Hi, DAG); 721fe6060f1SDimitry Andric return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, 7250b57cec5SDimitry Andric unsigned mainop, unsigned ovf) const { 7260b57cec5SDimitry Andric SDLoc DL(Op); 7270b57cec5SDimitry Andric EVT VT = Op.getValueType(); 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric SDValue Lo = Op.getOperand(0); 7300b57cec5SDimitry Andric SDValue Hi = Op.getOperand(1); 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); 7330b57cec5SDimitry Andric // Extend sign. 7340b57cec5SDimitry Andric OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, 7350b57cec5SDimitry Andric DAG.getValueType(MVT::i1)); 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); 7400b57cec5SDimitry Andric } 7410b57cec5SDimitry Andric 7420b57cec5SDimitry Andric SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { 7430b57cec5SDimitry Andric SDLoc DL(Op); 7440b57cec5SDimitry Andric return DAG.getNode( 7450b57cec5SDimitry Andric ISD::SETCC, 7460b57cec5SDimitry Andric DL, 7470b57cec5SDimitry Andric MVT::i1, 7480b57cec5SDimitry Andric Op, DAG.getConstantFP(1.0f, DL, MVT::f32), 7490b57cec5SDimitry Andric DAG.getCondCode(ISD::SETEQ)); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { 7530b57cec5SDimitry Andric SDLoc DL(Op); 7540b57cec5SDimitry Andric return DAG.getNode( 7550b57cec5SDimitry Andric ISD::SETCC, 7560b57cec5SDimitry Andric DL, 7570b57cec5SDimitry Andric MVT::i1, 7580b57cec5SDimitry Andric Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), 7590b57cec5SDimitry Andric DAG.getCondCode(ISD::SETEQ)); 7600b57cec5SDimitry Andric } 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 7630b57cec5SDimitry Andric const SDLoc &DL, 7640b57cec5SDimitry Andric unsigned DwordOffset) const { 7650b57cec5SDimitry Andric unsigned ByteOffset = DwordOffset * 4; 7660b57cec5SDimitry Andric PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 7670b57cec5SDimitry Andric AMDGPUAS::PARAM_I_ADDRESS); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric // We shouldn't be using an offset wider than 16-bits for implicit parameters. 7700b57cec5SDimitry Andric assert(isInt<16>(ByteOffset)); 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric return DAG.getLoad(VT, DL, DAG.getEntryNode(), 7730b57cec5SDimitry Andric DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR 7740b57cec5SDimitry Andric MachinePointerInfo(ConstantPointerNull::get(PtrType))); 7750b57cec5SDimitry Andric } 7760b57cec5SDimitry Andric 7770b57cec5SDimitry Andric bool R600TargetLowering::isZero(SDValue Op) const { 778*0fca6ea1SDimitry Andric if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) 779349cc55cSDimitry Andric return Cst->isZero(); 780*0fca6ea1SDimitry Andric if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)) 7810b57cec5SDimitry Andric return CstFP->isZero(); 7820b57cec5SDimitry Andric return false; 7830b57cec5SDimitry Andric } 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric bool R600TargetLowering::isHWTrueValue(SDValue Op) const { 7860b57cec5SDimitry Andric if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 7870b57cec5SDimitry Andric return CFP->isExactlyValue(1.0); 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric return isAllOnesConstant(Op); 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric bool R600TargetLowering::isHWFalseValue(SDValue Op) const { 7930b57cec5SDimitry Andric if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 7940b57cec5SDimitry Andric return CFP->getValueAPF().isZero(); 7950b57cec5SDimitry Andric } 7960b57cec5SDimitry Andric return isNullConstant(Op); 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 8000b57cec5SDimitry Andric SDLoc DL(Op); 8010b57cec5SDimitry Andric EVT VT = Op.getValueType(); 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 8040b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 8050b57cec5SDimitry Andric SDValue True = Op.getOperand(2); 8060b57cec5SDimitry Andric SDValue False = Op.getOperand(3); 8070b57cec5SDimitry Andric SDValue CC = Op.getOperand(4); 8080b57cec5SDimitry Andric SDValue Temp; 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric if (VT == MVT::f32) { 8110b57cec5SDimitry Andric DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); 8120b57cec5SDimitry Andric SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); 8130b57cec5SDimitry Andric if (MinMax) 8140b57cec5SDimitry Andric return MinMax; 8150b57cec5SDimitry Andric } 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric // LHS and RHS are guaranteed to be the same value type 8180b57cec5SDimitry Andric EVT CompareVT = LHS.getValueType(); 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric // Check if we can lower this to a native operation. 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric // Try to lower to a SET* instruction: 8230b57cec5SDimitry Andric // 8240b57cec5SDimitry Andric // SET* can match the following patterns: 8250b57cec5SDimitry Andric // 8260b57cec5SDimitry Andric // select_cc f32, f32, -1, 0, cc_supported 8270b57cec5SDimitry Andric // select_cc f32, f32, 1.0f, 0.0f, cc_supported 8280b57cec5SDimitry Andric // select_cc i32, i32, -1, 0, cc_supported 8290b57cec5SDimitry Andric // 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric // Move hardware True/False values to the correct operand. 8320b57cec5SDimitry Andric if (isHWTrueValue(False) && isHWFalseValue(True)) { 833480093f4SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 834480093f4SDimitry Andric ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT); 8350b57cec5SDimitry Andric if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { 8360b57cec5SDimitry Andric std::swap(False, True); 8370b57cec5SDimitry Andric CC = DAG.getCondCode(InverseCC); 8380b57cec5SDimitry Andric } else { 8390b57cec5SDimitry Andric ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); 8400b57cec5SDimitry Andric if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { 8410b57cec5SDimitry Andric std::swap(False, True); 8420b57cec5SDimitry Andric std::swap(LHS, RHS); 8430b57cec5SDimitry Andric CC = DAG.getCondCode(SwapInvCC); 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric if (isHWTrueValue(True) && isHWFalseValue(False) && 8490b57cec5SDimitry Andric (CompareVT == VT || VT == MVT::i32)) { 8500b57cec5SDimitry Andric // This can be matched by a SET* instruction. 8510b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric // Try to lower to a CND* instruction: 8550b57cec5SDimitry Andric // 8560b57cec5SDimitry Andric // CND* can match the following patterns: 8570b57cec5SDimitry Andric // 8580b57cec5SDimitry Andric // select_cc f32, 0.0, f32, f32, cc_supported 8590b57cec5SDimitry Andric // select_cc f32, 0.0, i32, i32, cc_supported 8600b57cec5SDimitry Andric // select_cc i32, 0, f32, f32, cc_supported 8610b57cec5SDimitry Andric // select_cc i32, 0, i32, i32, cc_supported 8620b57cec5SDimitry Andric // 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric // Try to move the zero value to the RHS 8650b57cec5SDimitry Andric if (isZero(LHS)) { 8660b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 8670b57cec5SDimitry Andric // Try swapping the operands 8680b57cec5SDimitry Andric ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); 8690b57cec5SDimitry Andric if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 8700b57cec5SDimitry Andric std::swap(LHS, RHS); 8710b57cec5SDimitry Andric CC = DAG.getCondCode(CCSwapped); 8720b57cec5SDimitry Andric } else { 873349cc55cSDimitry Andric // Try inverting the condition and then swapping the operands 874480093f4SDimitry Andric ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT); 8750b57cec5SDimitry Andric CCSwapped = ISD::getSetCCSwappedOperands(CCInv); 8760b57cec5SDimitry Andric if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 8770b57cec5SDimitry Andric std::swap(True, False); 8780b57cec5SDimitry Andric std::swap(LHS, RHS); 8790b57cec5SDimitry Andric CC = DAG.getCondCode(CCSwapped); 8800b57cec5SDimitry Andric } 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric if (isZero(RHS)) { 8840b57cec5SDimitry Andric SDValue Cond = LHS; 8850b57cec5SDimitry Andric SDValue Zero = RHS; 8860b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 8870b57cec5SDimitry Andric if (CompareVT != VT) { 8880b57cec5SDimitry Andric // Bitcast True / False to the correct types. This will end up being 8890b57cec5SDimitry Andric // a nop, but it allows us to define only a single pattern in the 8900b57cec5SDimitry Andric // .TD files for each CND* instruction rather than having to have 8910b57cec5SDimitry Andric // one pattern for integer True/False and one for fp True/False 8920b57cec5SDimitry Andric True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 8930b57cec5SDimitry Andric False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric switch (CCOpcode) { 8970b57cec5SDimitry Andric case ISD::SETONE: 8980b57cec5SDimitry Andric case ISD::SETUNE: 8990b57cec5SDimitry Andric case ISD::SETNE: 900480093f4SDimitry Andric CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT); 9010b57cec5SDimitry Andric Temp = True; 9020b57cec5SDimitry Andric True = False; 9030b57cec5SDimitry Andric False = Temp; 9040b57cec5SDimitry Andric break; 9050b57cec5SDimitry Andric default: 9060b57cec5SDimitry Andric break; 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 9090b57cec5SDimitry Andric Cond, Zero, 9100b57cec5SDimitry Andric True, False, 9110b57cec5SDimitry Andric DAG.getCondCode(CCOpcode)); 9120b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 9130b57cec5SDimitry Andric } 9140b57cec5SDimitry Andric 9150b57cec5SDimitry Andric // If we make it this for it means we have no native instructions to handle 9160b57cec5SDimitry Andric // this SELECT_CC, so we must lower it. 9170b57cec5SDimitry Andric SDValue HWTrue, HWFalse; 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric if (CompareVT == MVT::f32) { 9200b57cec5SDimitry Andric HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); 9210b57cec5SDimitry Andric HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); 9220b57cec5SDimitry Andric } else if (CompareVT == MVT::i32) { 9230b57cec5SDimitry Andric HWTrue = DAG.getConstant(-1, DL, CompareVT); 9240b57cec5SDimitry Andric HWFalse = DAG.getConstant(0, DL, CompareVT); 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric else { 9270b57cec5SDimitry Andric llvm_unreachable("Unhandled value type in LowerSELECT_CC"); 9280b57cec5SDimitry Andric } 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric // Lower this unsupported SELECT_CC into a combination of two supported 9310b57cec5SDimitry Andric // SELECT_CC operations. 9320b57cec5SDimitry Andric SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT_CC, DL, VT, 9350b57cec5SDimitry Andric Cond, HWFalse, 9360b57cec5SDimitry Andric True, False, 9370b57cec5SDimitry Andric DAG.getCondCode(ISD::SETNE)); 9380b57cec5SDimitry Andric } 9390b57cec5SDimitry Andric 940bdd1243dSDimitry Andric SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op, 941bdd1243dSDimitry Andric SelectionDAG &DAG) const { 942bdd1243dSDimitry Andric SDLoc SL(Op); 943bdd1243dSDimitry Andric EVT VT = Op.getValueType(); 944bdd1243dSDimitry Andric 945bdd1243dSDimitry Andric const R600TargetMachine &TM = 946bdd1243dSDimitry Andric static_cast<const R600TargetMachine &>(getTargetMachine()); 947bdd1243dSDimitry Andric 948bdd1243dSDimitry Andric const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op); 949bdd1243dSDimitry Andric unsigned SrcAS = ASC->getSrcAddressSpace(); 950bdd1243dSDimitry Andric unsigned DestAS = ASC->getDestAddressSpace(); 951bdd1243dSDimitry Andric 95206c3fb27SDimitry Andric if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS) 953bdd1243dSDimitry Andric return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT); 954bdd1243dSDimitry Andric 955bdd1243dSDimitry Andric return Op; 956bdd1243dSDimitry Andric } 957bdd1243dSDimitry Andric 9580b57cec5SDimitry Andric /// LLVM generates byte-addressed pointers. For indirect addressing, we need to 9590b57cec5SDimitry Andric /// convert these pointers to a register index. Each register holds 9600b57cec5SDimitry Andric /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the 96181ad6265SDimitry Andric /// \p StackWidth, which tells us how many of the 4 sub-registers will be used 9620b57cec5SDimitry Andric /// for indirect addressing. 9630b57cec5SDimitry Andric SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, 9640b57cec5SDimitry Andric unsigned StackWidth, 9650b57cec5SDimitry Andric SelectionDAG &DAG) const { 9660b57cec5SDimitry Andric unsigned SRLPad; 9670b57cec5SDimitry Andric switch(StackWidth) { 9680b57cec5SDimitry Andric case 1: 9690b57cec5SDimitry Andric SRLPad = 2; 9700b57cec5SDimitry Andric break; 9710b57cec5SDimitry Andric case 2: 9720b57cec5SDimitry Andric SRLPad = 3; 9730b57cec5SDimitry Andric break; 9740b57cec5SDimitry Andric case 4: 9750b57cec5SDimitry Andric SRLPad = 4; 9760b57cec5SDimitry Andric break; 9770b57cec5SDimitry Andric default: llvm_unreachable("Invalid stack width"); 9780b57cec5SDimitry Andric } 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric SDLoc DL(Ptr); 9810b57cec5SDimitry Andric return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, 9820b57cec5SDimitry Andric DAG.getConstant(SRLPad, DL, MVT::i32)); 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric void R600TargetLowering::getStackAddress(unsigned StackWidth, 9860b57cec5SDimitry Andric unsigned ElemIdx, 9870b57cec5SDimitry Andric unsigned &Channel, 9880b57cec5SDimitry Andric unsigned &PtrIncr) const { 9890b57cec5SDimitry Andric switch (StackWidth) { 9900b57cec5SDimitry Andric default: 9910b57cec5SDimitry Andric case 1: 9920b57cec5SDimitry Andric Channel = 0; 9930b57cec5SDimitry Andric if (ElemIdx > 0) { 9940b57cec5SDimitry Andric PtrIncr = 1; 9950b57cec5SDimitry Andric } else { 9960b57cec5SDimitry Andric PtrIncr = 0; 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric break; 9990b57cec5SDimitry Andric case 2: 10000b57cec5SDimitry Andric Channel = ElemIdx % 2; 10010b57cec5SDimitry Andric if (ElemIdx == 2) { 10020b57cec5SDimitry Andric PtrIncr = 1; 10030b57cec5SDimitry Andric } else { 10040b57cec5SDimitry Andric PtrIncr = 0; 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric break; 10070b57cec5SDimitry Andric case 4: 10080b57cec5SDimitry Andric Channel = ElemIdx; 10090b57cec5SDimitry Andric PtrIncr = 0; 10100b57cec5SDimitry Andric break; 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric } 10130b57cec5SDimitry Andric 10140b57cec5SDimitry Andric SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, 10150b57cec5SDimitry Andric SelectionDAG &DAG) const { 10160b57cec5SDimitry Andric SDLoc DL(Store); 10170b57cec5SDimitry Andric //TODO: Who creates the i8 stores? 10180b57cec5SDimitry Andric assert(Store->isTruncatingStore() 10190b57cec5SDimitry Andric || Store->getValue().getValueType() == MVT::i8); 10200b57cec5SDimitry Andric assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric SDValue Mask; 10230b57cec5SDimitry Andric if (Store->getMemoryVT() == MVT::i8) { 1024bdd1243dSDimitry Andric assert(Store->getAlign() >= 1); 10250b57cec5SDimitry Andric Mask = DAG.getConstant(0xff, DL, MVT::i32); 10260b57cec5SDimitry Andric } else if (Store->getMemoryVT() == MVT::i16) { 1027bdd1243dSDimitry Andric assert(Store->getAlign() >= 2); 10280b57cec5SDimitry Andric Mask = DAG.getConstant(0xffff, DL, MVT::i32); 10290b57cec5SDimitry Andric } else { 10300b57cec5SDimitry Andric llvm_unreachable("Unsupported private trunc store"); 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric SDValue OldChain = Store->getChain(); 10340b57cec5SDimitry Andric bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); 10350b57cec5SDimitry Andric // Skip dummy 10360b57cec5SDimitry Andric SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; 10370b57cec5SDimitry Andric SDValue BasePtr = Store->getBasePtr(); 10380b57cec5SDimitry Andric SDValue Offset = Store->getOffset(); 10390b57cec5SDimitry Andric EVT MemVT = Store->getMemoryVT(); 10400b57cec5SDimitry Andric 10410b57cec5SDimitry Andric SDValue LoadPtr = BasePtr; 10420b57cec5SDimitry Andric if (!Offset.isUndef()) { 10430b57cec5SDimitry Andric LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric 10460b57cec5SDimitry Andric // Get dword location 10470b57cec5SDimitry Andric // TODO: this should be eliminated by the future SHR ptr, 2 10480b57cec5SDimitry Andric SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 10490b57cec5SDimitry Andric DAG.getConstant(0xfffffffc, DL, MVT::i32)); 10500b57cec5SDimitry Andric 10510b57cec5SDimitry Andric // Load dword 10520b57cec5SDimitry Andric // TODO: can we be smarter about machine pointer info? 1053480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS); 10540b57cec5SDimitry Andric SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric Chain = Dst.getValue(1); 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric // Get offset in dword 10590b57cec5SDimitry Andric SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 10600b57cec5SDimitry Andric DAG.getConstant(0x3, DL, MVT::i32)); 10610b57cec5SDimitry Andric 10620b57cec5SDimitry Andric // Convert byte offset to bit shift 10630b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 10640b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32)); 10650b57cec5SDimitry Andric 106681ad6265SDimitry Andric // TODO: Contrary to the name of the function, 10670b57cec5SDimitry Andric // it also handles sub i32 non-truncating stores (like i1) 10680b57cec5SDimitry Andric SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, 10690b57cec5SDimitry Andric Store->getValue()); 10700b57cec5SDimitry Andric 10710b57cec5SDimitry Andric // Mask the value to the right type 10720b57cec5SDimitry Andric SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); 10730b57cec5SDimitry Andric 10740b57cec5SDimitry Andric // Shift the value in place 10750b57cec5SDimitry Andric SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, 10760b57cec5SDimitry Andric MaskedValue, ShiftAmt); 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric // Shift the mask in place 10790b57cec5SDimitry Andric SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric // Invert the mask. NOTE: if we had native ROL instructions we could 10820b57cec5SDimitry Andric // use inverted mask 10830b57cec5SDimitry Andric DstMask = DAG.getNOT(DL, DstMask, MVT::i32); 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric // Cleanup the target bits 10860b57cec5SDimitry Andric Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric // Add the new bits 10890b57cec5SDimitry Andric SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric // Store dword 10920b57cec5SDimitry Andric // TODO: Can we be smarter about MachinePointerInfo? 10930b57cec5SDimitry Andric SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo); 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric // If we are part of expanded vector, make our neighbors depend on this store 10960b57cec5SDimitry Andric if (VectorTrunc) { 10970b57cec5SDimitry Andric // Make all other vector elements depend on this store 10980b57cec5SDimitry Andric Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); 10990b57cec5SDimitry Andric DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); 11000b57cec5SDimitry Andric } 11010b57cec5SDimitry Andric return NewStore; 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 11050b57cec5SDimitry Andric StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 11060b57cec5SDimitry Andric unsigned AS = StoreNode->getAddressSpace(); 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric SDValue Chain = StoreNode->getChain(); 11090b57cec5SDimitry Andric SDValue Ptr = StoreNode->getBasePtr(); 11100b57cec5SDimitry Andric SDValue Value = StoreNode->getValue(); 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric EVT VT = Value.getValueType(); 11130b57cec5SDimitry Andric EVT MemVT = StoreNode->getMemoryVT(); 11140b57cec5SDimitry Andric EVT PtrVT = Ptr.getValueType(); 11150b57cec5SDimitry Andric 11160b57cec5SDimitry Andric SDLoc DL(Op); 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric const bool TruncatingStore = StoreNode->isTruncatingStore(); 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric // Neither LOCAL nor PRIVATE can do vectors at the moment 11210b57cec5SDimitry Andric if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS || 11220b57cec5SDimitry Andric TruncatingStore) && 11230b57cec5SDimitry Andric VT.isVector()) { 11240b57cec5SDimitry Andric if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) { 11250b57cec5SDimitry Andric // Add an extra level of chain to isolate this vector 11260b57cec5SDimitry Andric SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); 11270b57cec5SDimitry Andric // TODO: can the chain be replaced without creating a new store? 11280b57cec5SDimitry Andric SDValue NewStore = DAG.getTruncStore( 112981ad6265SDimitry Andric NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT, 113081ad6265SDimitry Andric StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(), 113181ad6265SDimitry Andric StoreNode->getAAInfo()); 11320b57cec5SDimitry Andric StoreNode = cast<StoreSDNode>(NewStore); 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric return scalarizeVectorStore(StoreNode, DAG); 11360b57cec5SDimitry Andric } 11370b57cec5SDimitry Andric 11385ffd83dbSDimitry Andric Align Alignment = StoreNode->getAlign(); 11395ffd83dbSDimitry Andric if (Alignment < MemVT.getStoreSize() && 1140fe6060f1SDimitry Andric !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment, 11415ffd83dbSDimitry Andric StoreNode->getMemOperand()->getFlags(), 11425ffd83dbSDimitry Andric nullptr)) { 11430b57cec5SDimitry Andric return expandUnalignedStore(StoreNode, DAG); 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric 11460b57cec5SDimitry Andric SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, 11470b57cec5SDimitry Andric DAG.getConstant(2, DL, PtrVT)); 11480b57cec5SDimitry Andric 11490b57cec5SDimitry Andric if (AS == AMDGPUAS::GLOBAL_ADDRESS) { 11500b57cec5SDimitry Andric // It is beneficial to create MSKOR here instead of combiner to avoid 11510b57cec5SDimitry Andric // artificial dependencies introduced by RMW 11520b57cec5SDimitry Andric if (TruncatingStore) { 11530b57cec5SDimitry Andric assert(VT.bitsLE(MVT::i32)); 11540b57cec5SDimitry Andric SDValue MaskConstant; 11550b57cec5SDimitry Andric if (MemVT == MVT::i8) { 11560b57cec5SDimitry Andric MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32); 11570b57cec5SDimitry Andric } else { 11580b57cec5SDimitry Andric assert(MemVT == MVT::i16); 1159bdd1243dSDimitry Andric assert(StoreNode->getAlign() >= 2); 11600b57cec5SDimitry Andric MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); 11610b57cec5SDimitry Andric } 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, 11640b57cec5SDimitry Andric DAG.getConstant(0x00000003, DL, PtrVT)); 11650b57cec5SDimitry Andric SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, 11660b57cec5SDimitry Andric DAG.getConstant(3, DL, VT)); 11670b57cec5SDimitry Andric 11680b57cec5SDimitry Andric // Put the mask in correct place 11690b57cec5SDimitry Andric SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric // Put the value bits in correct place 11720b57cec5SDimitry Andric SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); 11730b57cec5SDimitry Andric SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 11760b57cec5SDimitry Andric // vector instead. 11770b57cec5SDimitry Andric SDValue Src[4] = { 11780b57cec5SDimitry Andric ShiftedValue, 11790b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), 11800b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), 11810b57cec5SDimitry Andric Mask 11820b57cec5SDimitry Andric }; 11830b57cec5SDimitry Andric SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src); 11840b57cec5SDimitry Andric SDValue Args[3] = { Chain, Input, DWordAddr }; 11850b57cec5SDimitry Andric return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, 11860b57cec5SDimitry Andric Op->getVTList(), Args, MemVT, 11870b57cec5SDimitry Andric StoreNode->getMemOperand()); 1188*0fca6ea1SDimitry Andric } 1189*0fca6ea1SDimitry Andric if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { 11900b57cec5SDimitry Andric // Convert pointer from byte address to dword address. 11910b57cec5SDimitry Andric Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric if (StoreNode->isIndexed()) { 11940b57cec5SDimitry Andric llvm_unreachable("Indexed stores not supported yet"); 11950b57cec5SDimitry Andric } else { 11960b57cec5SDimitry Andric Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric return Chain; 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric } 12010b57cec5SDimitry Andric 12020b57cec5SDimitry Andric // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes 12030b57cec5SDimitry Andric if (AS != AMDGPUAS::PRIVATE_ADDRESS) 12040b57cec5SDimitry Andric return SDValue(); 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric if (MemVT.bitsLT(MVT::i32)) 12070b57cec5SDimitry Andric return lowerPrivateTruncStore(StoreNode, DAG); 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric // Standard i32+ store, tag it with DWORDADDR to note that the address 12100b57cec5SDimitry Andric // has been shifted 12110b57cec5SDimitry Andric if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 12120b57cec5SDimitry Andric Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 12130b57cec5SDimitry Andric return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 12140b57cec5SDimitry Andric } 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric // Tagged i32+ stores will be matched by patterns 12170b57cec5SDimitry Andric return SDValue(); 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric // return (512 + (kc_bank << 12) 12210b57cec5SDimitry Andric static int 12220b57cec5SDimitry Andric ConstantAddressBlock(unsigned AddressSpace) { 12230b57cec5SDimitry Andric switch (AddressSpace) { 12240b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_0: 12250b57cec5SDimitry Andric return 512; 12260b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_1: 12270b57cec5SDimitry Andric return 512 + 4096; 12280b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_2: 12290b57cec5SDimitry Andric return 512 + 4096 * 2; 12300b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_3: 12310b57cec5SDimitry Andric return 512 + 4096 * 3; 12320b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_4: 12330b57cec5SDimitry Andric return 512 + 4096 * 4; 12340b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_5: 12350b57cec5SDimitry Andric return 512 + 4096 * 5; 12360b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_6: 12370b57cec5SDimitry Andric return 512 + 4096 * 6; 12380b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_7: 12390b57cec5SDimitry Andric return 512 + 4096 * 7; 12400b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_8: 12410b57cec5SDimitry Andric return 512 + 4096 * 8; 12420b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_9: 12430b57cec5SDimitry Andric return 512 + 4096 * 9; 12440b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_10: 12450b57cec5SDimitry Andric return 512 + 4096 * 10; 12460b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_11: 12470b57cec5SDimitry Andric return 512 + 4096 * 11; 12480b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_12: 12490b57cec5SDimitry Andric return 512 + 4096 * 12; 12500b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_13: 12510b57cec5SDimitry Andric return 512 + 4096 * 13; 12520b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_14: 12530b57cec5SDimitry Andric return 512 + 4096 * 14; 12540b57cec5SDimitry Andric case AMDGPUAS::CONSTANT_BUFFER_15: 12550b57cec5SDimitry Andric return 512 + 4096 * 15; 12560b57cec5SDimitry Andric default: 12570b57cec5SDimitry Andric return -1; 12580b57cec5SDimitry Andric } 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, 12620b57cec5SDimitry Andric SelectionDAG &DAG) const { 12630b57cec5SDimitry Andric SDLoc DL(Op); 12640b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 12650b57cec5SDimitry Andric ISD::LoadExtType ExtType = Load->getExtensionType(); 12660b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 1267bdd1243dSDimitry Andric assert(Load->getAlign() >= MemVT.getStoreSize()); 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 12700b57cec5SDimitry Andric SDValue Chain = Load->getChain(); 12710b57cec5SDimitry Andric SDValue Offset = Load->getOffset(); 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric SDValue LoadPtr = BasePtr; 12740b57cec5SDimitry Andric if (!Offset.isUndef()) { 12750b57cec5SDimitry Andric LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 12760b57cec5SDimitry Andric } 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric // Get dword location 12790b57cec5SDimitry Andric // NOTE: this should be eliminated by the future SHR ptr, 2 12800b57cec5SDimitry Andric SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 12810b57cec5SDimitry Andric DAG.getConstant(0xfffffffc, DL, MVT::i32)); 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric // Load dword 12840b57cec5SDimitry Andric // TODO: can we be smarter about machine pointer info? 1285480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS); 12860b57cec5SDimitry Andric SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric // Get offset within the register. 12890b57cec5SDimitry Andric SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, 12900b57cec5SDimitry Andric LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric // Bit offset of target byte (byteIdx * 8). 12930b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 12940b57cec5SDimitry Andric DAG.getConstant(3, DL, MVT::i32)); 12950b57cec5SDimitry Andric 12960b57cec5SDimitry Andric // Shift to the right. 12970b57cec5SDimitry Andric SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric // Eliminate the upper bits by setting them to ... 13000b57cec5SDimitry Andric EVT MemEltVT = MemVT.getScalarType(); 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric if (ExtType == ISD::SEXTLOAD) { // ... ones. 13030b57cec5SDimitry Andric SDValue MemEltVTNode = DAG.getValueType(MemEltVT); 13040b57cec5SDimitry Andric Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); 13050b57cec5SDimitry Andric } else { // ... or zeros. 13060b57cec5SDimitry Andric Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric SDValue Ops[] = { 13100b57cec5SDimitry Andric Ret, 13110b57cec5SDimitry Andric Read.getValue(1) // This should be our output chain 13120b57cec5SDimitry Andric }; 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric return DAG.getMergeValues(Ops, DL); 13150b57cec5SDimitry Andric } 13160b57cec5SDimitry Andric 13170b57cec5SDimitry Andric SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 13180b57cec5SDimitry Andric LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 13190b57cec5SDimitry Andric unsigned AS = LoadNode->getAddressSpace(); 13200b57cec5SDimitry Andric EVT MemVT = LoadNode->getMemoryVT(); 13210b57cec5SDimitry Andric ISD::LoadExtType ExtType = LoadNode->getExtensionType(); 13220b57cec5SDimitry Andric 13230b57cec5SDimitry Andric if (AS == AMDGPUAS::PRIVATE_ADDRESS && 13240b57cec5SDimitry Andric ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { 13250b57cec5SDimitry Andric return lowerPrivateExtLoad(Op, DAG); 13260b57cec5SDimitry Andric } 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric SDLoc DL(Op); 13290b57cec5SDimitry Andric EVT VT = Op.getValueType(); 13300b57cec5SDimitry Andric SDValue Chain = LoadNode->getChain(); 13310b57cec5SDimitry Andric SDValue Ptr = LoadNode->getBasePtr(); 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 13340b57cec5SDimitry Andric LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 13350b57cec5SDimitry Andric VT.isVector()) { 1336480093f4SDimitry Andric SDValue Ops[2]; 1337480093f4SDimitry Andric std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG); 1338480093f4SDimitry Andric return DAG.getMergeValues(Ops, DL); 13390b57cec5SDimitry Andric } 13400b57cec5SDimitry Andric 13410b57cec5SDimitry Andric // This is still used for explicit load from addrspace(8) 13420b57cec5SDimitry Andric int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 13430b57cec5SDimitry Andric if (ConstantBlock > -1 && 13440b57cec5SDimitry Andric ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || 13450b57cec5SDimitry Andric (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { 13460b57cec5SDimitry Andric SDValue Result; 13470b57cec5SDimitry Andric if (isa<Constant>(LoadNode->getMemOperand()->getValue()) || 13480b57cec5SDimitry Andric isa<ConstantSDNode>(Ptr)) { 13490b57cec5SDimitry Andric return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG); 1350*0fca6ea1SDimitry Andric } 13510b57cec5SDimitry Andric // TODO: Does this even work? 13520b57cec5SDimitry Andric // non-constant ptr can't be folded, keeps it as a v4f32 load 13530b57cec5SDimitry Andric Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 13540b57cec5SDimitry Andric DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, 13550b57cec5SDimitry Andric DAG.getConstant(4, DL, MVT::i32)), 13560b57cec5SDimitry Andric DAG.getConstant(LoadNode->getAddressSpace() - 1357*0fca6ea1SDimitry Andric AMDGPUAS::CONSTANT_BUFFER_0, 1358*0fca6ea1SDimitry Andric DL, MVT::i32)); 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric if (!VT.isVector()) { 13610b57cec5SDimitry Andric Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 13620b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32)); 13630b57cec5SDimitry Andric } 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric SDValue MergedValues[2] = { 13660b57cec5SDimitry Andric Result, 13670b57cec5SDimitry Andric Chain 13680b57cec5SDimitry Andric }; 13690b57cec5SDimitry Andric return DAG.getMergeValues(MergedValues, DL); 13700b57cec5SDimitry Andric } 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric // For most operations returning SDValue() will result in the node being 13730b57cec5SDimitry Andric // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we 13740b57cec5SDimitry Andric // need to manually expand loads that may be legal in some address spaces and 13750b57cec5SDimitry Andric // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for 13760b57cec5SDimitry Andric // compute shaders, since the data is sign extended when it is uploaded to the 13770b57cec5SDimitry Andric // buffer. However SEXT loads from other address spaces are not supported, so 13780b57cec5SDimitry Andric // we need to expand them here. 13790b57cec5SDimitry Andric if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { 13800b57cec5SDimitry Andric assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); 13810b57cec5SDimitry Andric SDValue NewLoad = DAG.getExtLoad( 13820b57cec5SDimitry Andric ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, 138381ad6265SDimitry Andric LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags()); 13840b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, 13850b57cec5SDimitry Andric DAG.getValueType(MemVT)); 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric SDValue MergedValues[2] = { Res, Chain }; 13880b57cec5SDimitry Andric return DAG.getMergeValues(MergedValues, DL); 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 13920b57cec5SDimitry Andric return SDValue(); 13930b57cec5SDimitry Andric } 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric // DWORDADDR ISD marks already shifted address 13960b57cec5SDimitry Andric if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 13970b57cec5SDimitry Andric assert(VT == MVT::i32); 13980b57cec5SDimitry Andric Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); 13990b57cec5SDimitry Andric Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); 14000b57cec5SDimitry Andric return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); 14010b57cec5SDimitry Andric } 14020b57cec5SDimitry Andric return SDValue(); 14030b57cec5SDimitry Andric } 14040b57cec5SDimitry Andric 14050b57cec5SDimitry Andric SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 14060b57cec5SDimitry Andric SDValue Chain = Op.getOperand(0); 14070b57cec5SDimitry Andric SDValue Cond = Op.getOperand(1); 14080b57cec5SDimitry Andric SDValue Jump = Op.getOperand(2); 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), 14110b57cec5SDimitry Andric Chain, Jump, Cond); 14120b57cec5SDimitry Andric } 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, 14150b57cec5SDimitry Andric SelectionDAG &DAG) const { 14160b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 14170b57cec5SDimitry Andric const R600FrameLowering *TFL = Subtarget->getFrameLowering(); 14180b57cec5SDimitry Andric 14190b57cec5SDimitry Andric FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric unsigned FrameIndex = FIN->getIndex(); 14225ffd83dbSDimitry Andric Register IgnoredFrameReg; 1423e8d8bef9SDimitry Andric StackOffset Offset = 14240b57cec5SDimitry Andric TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); 1425e8d8bef9SDimitry Andric return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF), 1426e8d8bef9SDimitry Andric SDLoc(Op), Op.getValueType()); 14270b57cec5SDimitry Andric } 14280b57cec5SDimitry Andric 14290b57cec5SDimitry Andric CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, 14300b57cec5SDimitry Andric bool IsVarArg) const { 14310b57cec5SDimitry Andric switch (CC) { 14320b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 14330b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 14340b57cec5SDimitry Andric case CallingConv::C: 14350b57cec5SDimitry Andric case CallingConv::Fast: 14360b57cec5SDimitry Andric case CallingConv::Cold: 14370b57cec5SDimitry Andric llvm_unreachable("kernels should not be handled here"); 14380b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 14390b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 14400b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 14410b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 14420b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 14430b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 14440b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 14450b57cec5SDimitry Andric return CC_R600; 14460b57cec5SDimitry Andric default: 14470b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention."); 14480b57cec5SDimitry Andric } 14490b57cec5SDimitry Andric } 14500b57cec5SDimitry Andric 14510b57cec5SDimitry Andric /// XXX Only kernel functions are supported, so we can assume for now that 14520b57cec5SDimitry Andric /// every function is a kernel function, but in the future we should use 14530b57cec5SDimitry Andric /// separate calling conventions for kernel and non-kernel functions. 14540b57cec5SDimitry Andric SDValue R600TargetLowering::LowerFormalArguments( 14550b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 14560b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 14570b57cec5SDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 14580b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 14590b57cec5SDimitry Andric CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 14600b57cec5SDimitry Andric *DAG.getContext()); 14610b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 14620b57cec5SDimitry Andric SmallVector<ISD::InputArg, 8> LocalIns; 14630b57cec5SDimitry Andric 14640b57cec5SDimitry Andric if (AMDGPU::isShader(CallConv)) { 14650b57cec5SDimitry Andric CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); 14660b57cec5SDimitry Andric } else { 14670b57cec5SDimitry Andric analyzeFormalArgumentsCompute(CCInfo, Ins); 14680b57cec5SDimitry Andric } 14690b57cec5SDimitry Andric 14700b57cec5SDimitry Andric for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 14710b57cec5SDimitry Andric CCValAssign &VA = ArgLocs[i]; 14720b57cec5SDimitry Andric const ISD::InputArg &In = Ins[i]; 14730b57cec5SDimitry Andric EVT VT = In.VT; 14740b57cec5SDimitry Andric EVT MemVT = VA.getLocVT(); 14750b57cec5SDimitry Andric if (!VT.isVector() && MemVT.isVector()) { 14760b57cec5SDimitry Andric // Get load source type if scalarized. 14770b57cec5SDimitry Andric MemVT = MemVT.getVectorElementType(); 14780b57cec5SDimitry Andric } 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric if (AMDGPU::isShader(CallConv)) { 1481e8d8bef9SDimitry Andric Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); 14820b57cec5SDimitry Andric SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); 14830b57cec5SDimitry Andric InVals.push_back(Register); 14840b57cec5SDimitry Andric continue; 14850b57cec5SDimitry Andric } 14860b57cec5SDimitry Andric 14870b57cec5SDimitry Andric // i64 isn't a legal type, so the register type used ends up as i32, which 14880b57cec5SDimitry Andric // isn't expected here. It attempts to create this sextload, but it ends up 14890b57cec5SDimitry Andric // being invalid. Somehow this seems to work with i64 arguments, but breaks 14900b57cec5SDimitry Andric // for <1 x i64>. 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andric // The first 36 bytes of the input buffer contains information about 14930b57cec5SDimitry Andric // thread group and global sizes. 14940b57cec5SDimitry Andric ISD::LoadExtType Ext = ISD::NON_EXTLOAD; 14950b57cec5SDimitry Andric if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { 14960b57cec5SDimitry Andric // FIXME: This should really check the extload type, but the handling of 14970b57cec5SDimitry Andric // extload vector parameters seems to be broken. 14980b57cec5SDimitry Andric 14990b57cec5SDimitry Andric // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 15000b57cec5SDimitry Andric Ext = ISD::SEXTLOAD; 15010b57cec5SDimitry Andric } 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric // Compute the offset from the value. 15040b57cec5SDimitry Andric // XXX - I think PartOffset should give you this, but it seems to give the 15050b57cec5SDimitry Andric // size of the register which isn't useful. 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric unsigned PartOffset = VA.getLocMemOffset(); 1508bdd1243dSDimitry Andric Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset); 15090b57cec5SDimitry Andric 1510480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS); 15110b57cec5SDimitry Andric SDValue Arg = DAG.getLoad( 15120b57cec5SDimitry Andric ISD::UNINDEXED, Ext, VT, DL, Chain, 15130b57cec5SDimitry Andric DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), 15140b57cec5SDimitry Andric PtrInfo, 15150b57cec5SDimitry Andric MemVT, Alignment, MachineMemOperand::MONonTemporal | 15160b57cec5SDimitry Andric MachineMemOperand::MODereferenceable | 15170b57cec5SDimitry Andric MachineMemOperand::MOInvariant); 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andric InVals.push_back(Arg); 15200b57cec5SDimitry Andric } 15210b57cec5SDimitry Andric return Chain; 15220b57cec5SDimitry Andric } 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 15250b57cec5SDimitry Andric EVT VT) const { 15260b57cec5SDimitry Andric if (!VT.isVector()) 15270b57cec5SDimitry Andric return MVT::i32; 15280b57cec5SDimitry Andric return VT.changeVectorElementTypeToInteger(); 15290b57cec5SDimitry Andric } 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andric bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, 1532349cc55cSDimitry Andric const MachineFunction &MF) const { 15330b57cec5SDimitry Andric // Local and Private addresses do not handle vectors. Limit to i32 15340b57cec5SDimitry Andric if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) { 15350b57cec5SDimitry Andric return (MemVT.getSizeInBits() <= 32); 15360b57cec5SDimitry Andric } 15370b57cec5SDimitry Andric return true; 15380b57cec5SDimitry Andric } 15390b57cec5SDimitry Andric 15400b57cec5SDimitry Andric bool R600TargetLowering::allowsMisalignedMemoryAccesses( 1541fe6060f1SDimitry Andric EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 1542bdd1243dSDimitry Andric unsigned *IsFast) const { 15430b57cec5SDimitry Andric if (IsFast) 1544bdd1243dSDimitry Andric *IsFast = 0; 15450b57cec5SDimitry Andric 15460b57cec5SDimitry Andric if (!VT.isSimple() || VT == MVT::Other) 15470b57cec5SDimitry Andric return false; 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric if (VT.bitsLT(MVT::i32)) 15500b57cec5SDimitry Andric return false; 15510b57cec5SDimitry Andric 15520b57cec5SDimitry Andric // TODO: This is a rough estimate. 15530b57cec5SDimitry Andric if (IsFast) 1554bdd1243dSDimitry Andric *IsFast = 1; 15550b57cec5SDimitry Andric 1556fe6060f1SDimitry Andric return VT.bitsGT(MVT::i32) && Alignment >= Align(4); 15570b57cec5SDimitry Andric } 15580b57cec5SDimitry Andric 15590b57cec5SDimitry Andric static SDValue CompactSwizzlableVector( 15600b57cec5SDimitry Andric SelectionDAG &DAG, SDValue VectorEntry, 15610b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &RemapSwizzle) { 15620b57cec5SDimitry Andric assert(RemapSwizzle.empty()); 15630b57cec5SDimitry Andric 15640b57cec5SDimitry Andric SDLoc DL(VectorEntry); 15650b57cec5SDimitry Andric EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric SDValue NewBldVec[4]; 15680b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) 15690b57cec5SDimitry Andric NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 15700b57cec5SDimitry Andric DAG.getIntPtrConstant(i, DL)); 15710b57cec5SDimitry Andric 15720b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 15730b57cec5SDimitry Andric if (NewBldVec[i].isUndef()) 15740b57cec5SDimitry Andric // We mask write here to teach later passes that the ith element of this 15750b57cec5SDimitry Andric // vector is undef. Thus we can use it to reduce 128 bits reg usage, 157681ad6265SDimitry Andric // break false dependencies and additionally make assembly easier to read. 15770b57cec5SDimitry Andric RemapSwizzle[i] = 7; // SEL_MASK_WRITE 15780b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { 15790b57cec5SDimitry Andric if (C->isZero()) { 15800b57cec5SDimitry Andric RemapSwizzle[i] = 4; // SEL_0 15810b57cec5SDimitry Andric NewBldVec[i] = DAG.getUNDEF(MVT::f32); 15820b57cec5SDimitry Andric } else if (C->isExactlyValue(1.0)) { 15830b57cec5SDimitry Andric RemapSwizzle[i] = 5; // SEL_1 15840b57cec5SDimitry Andric NewBldVec[i] = DAG.getUNDEF(MVT::f32); 15850b57cec5SDimitry Andric } 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric if (NewBldVec[i].isUndef()) 15890b57cec5SDimitry Andric continue; 1590480093f4SDimitry Andric 15910b57cec5SDimitry Andric for (unsigned j = 0; j < i; j++) { 15920b57cec5SDimitry Andric if (NewBldVec[i] == NewBldVec[j]) { 15930b57cec5SDimitry Andric NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); 15940b57cec5SDimitry Andric RemapSwizzle[i] = j; 15950b57cec5SDimitry Andric break; 15960b57cec5SDimitry Andric } 15970b57cec5SDimitry Andric } 15980b57cec5SDimitry Andric } 15990b57cec5SDimitry Andric 16000b57cec5SDimitry Andric return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 16010b57cec5SDimitry Andric NewBldVec); 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, 16050b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &RemapSwizzle) { 16060b57cec5SDimitry Andric assert(RemapSwizzle.empty()); 16070b57cec5SDimitry Andric 16080b57cec5SDimitry Andric SDLoc DL(VectorEntry); 16090b57cec5SDimitry Andric EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andric SDValue NewBldVec[4]; 16120b57cec5SDimitry Andric bool isUnmovable[4] = {false, false, false, false}; 16130b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) 16140b57cec5SDimitry Andric NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 16150b57cec5SDimitry Andric DAG.getIntPtrConstant(i, DL)); 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 16180b57cec5SDimitry Andric RemapSwizzle[i] = i; 16190b57cec5SDimitry Andric if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 16207a6dacacSDimitry Andric unsigned Idx = NewBldVec[i].getConstantOperandVal(1); 16210b57cec5SDimitry Andric if (i == Idx) 16220b57cec5SDimitry Andric isUnmovable[Idx] = true; 16230b57cec5SDimitry Andric } 16240b57cec5SDimitry Andric } 16250b57cec5SDimitry Andric 16260b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 16270b57cec5SDimitry Andric if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 16287a6dacacSDimitry Andric unsigned Idx = NewBldVec[i].getConstantOperandVal(1); 16290b57cec5SDimitry Andric if (isUnmovable[Idx]) 16300b57cec5SDimitry Andric continue; 16310b57cec5SDimitry Andric // Swap i and Idx 16320b57cec5SDimitry Andric std::swap(NewBldVec[Idx], NewBldVec[i]); 16330b57cec5SDimitry Andric std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); 16340b57cec5SDimitry Andric break; 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 16390b57cec5SDimitry Andric NewBldVec); 16400b57cec5SDimitry Andric } 16410b57cec5SDimitry Andric 1642753f127fSDimitry Andric SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], 16430b57cec5SDimitry Andric SelectionDAG &DAG, 16440b57cec5SDimitry Andric const SDLoc &DL) const { 16450b57cec5SDimitry Andric // Old -> New swizzle values 16460b57cec5SDimitry Andric DenseMap<unsigned, unsigned> SwizzleRemap; 16470b57cec5SDimitry Andric 16480b57cec5SDimitry Andric BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); 16490b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 16501db9f3b2SDimitry Andric unsigned Idx = Swz[i]->getAsZExtVal(); 165106c3fb27SDimitry Andric if (SwizzleRemap.contains(Idx)) 16520b57cec5SDimitry Andric Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric 16550b57cec5SDimitry Andric SwizzleRemap.clear(); 16560b57cec5SDimitry Andric BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); 16570b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 16581db9f3b2SDimitry Andric unsigned Idx = Swz[i]->getAsZExtVal(); 165906c3fb27SDimitry Andric if (SwizzleRemap.contains(Idx)) 16600b57cec5SDimitry Andric Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 16610b57cec5SDimitry Andric } 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric return BuildVector; 16640b57cec5SDimitry Andric } 16650b57cec5SDimitry Andric 16660b57cec5SDimitry Andric SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, 16670b57cec5SDimitry Andric SelectionDAG &DAG) const { 16680b57cec5SDimitry Andric SDLoc DL(LoadNode); 16690b57cec5SDimitry Andric EVT VT = LoadNode->getValueType(0); 16700b57cec5SDimitry Andric SDValue Chain = LoadNode->getChain(); 16710b57cec5SDimitry Andric SDValue Ptr = LoadNode->getBasePtr(); 16720b57cec5SDimitry Andric assert (isa<ConstantSDNode>(Ptr)); 16730b57cec5SDimitry Andric 16740b57cec5SDimitry Andric //TODO: Support smaller loads 16750b57cec5SDimitry Andric if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)) 16760b57cec5SDimitry Andric return SDValue(); 16770b57cec5SDimitry Andric 167881ad6265SDimitry Andric if (LoadNode->getAlign() < Align(4)) 16790b57cec5SDimitry Andric return SDValue(); 16800b57cec5SDimitry Andric 16810b57cec5SDimitry Andric int ConstantBlock = ConstantAddressBlock(Block); 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric SDValue Slots[4]; 16840b57cec5SDimitry Andric for (unsigned i = 0; i < 4; i++) { 16850b57cec5SDimitry Andric // We want Const position encoded with the following formula : 16860b57cec5SDimitry Andric // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 16870b57cec5SDimitry Andric // const_index is Ptr computed by llvm using an alignment of 16. 16880b57cec5SDimitry Andric // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 16890b57cec5SDimitry Andric // then div by 4 at the ISel step 16900b57cec5SDimitry Andric SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 16910b57cec5SDimitry Andric DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); 16920b57cec5SDimitry Andric Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 16930b57cec5SDimitry Andric } 16940b57cec5SDimitry Andric EVT NewVT = MVT::v4i32; 16950b57cec5SDimitry Andric unsigned NumElements = 4; 16960b57cec5SDimitry Andric if (VT.isVector()) { 16970b57cec5SDimitry Andric NewVT = VT; 16980b57cec5SDimitry Andric NumElements = VT.getVectorNumElements(); 16990b57cec5SDimitry Andric } 1700bdd1243dSDimitry Andric SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements)); 17010b57cec5SDimitry Andric if (!VT.isVector()) { 17020b57cec5SDimitry Andric Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 17030b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32)); 17040b57cec5SDimitry Andric } 17050b57cec5SDimitry Andric SDValue MergedValues[2] = { 17060b57cec5SDimitry Andric Result, 17070b57cec5SDimitry Andric Chain 17080b57cec5SDimitry Andric }; 17090b57cec5SDimitry Andric return DAG.getMergeValues(MergedValues, DL); 17100b57cec5SDimitry Andric } 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 17130b57cec5SDimitry Andric // Custom DAG Optimizations 17140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 17150b57cec5SDimitry Andric 17160b57cec5SDimitry Andric SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 17170b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 17180b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 17190b57cec5SDimitry Andric SDLoc DL(N); 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric switch (N->getOpcode()) { 17220b57cec5SDimitry Andric // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 17230b57cec5SDimitry Andric case ISD::FP_ROUND: { 17240b57cec5SDimitry Andric SDValue Arg = N->getOperand(0); 17250b57cec5SDimitry Andric if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 17260b57cec5SDimitry Andric return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0), 17270b57cec5SDimitry Andric Arg.getOperand(0)); 17280b57cec5SDimitry Andric } 17290b57cec5SDimitry Andric break; 17300b57cec5SDimitry Andric } 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> 17330b57cec5SDimitry Andric // (i32 select_cc f32, f32, -1, 0 cc) 17340b57cec5SDimitry Andric // 17350b57cec5SDimitry Andric // Mesa's GLSL frontend generates the above pattern a lot and we can lower 17360b57cec5SDimitry Andric // this to one of the SET*_DX10 instructions. 17370b57cec5SDimitry Andric case ISD::FP_TO_SINT: { 17380b57cec5SDimitry Andric SDValue FNeg = N->getOperand(0); 17390b57cec5SDimitry Andric if (FNeg.getOpcode() != ISD::FNEG) { 17400b57cec5SDimitry Andric return SDValue(); 17410b57cec5SDimitry Andric } 17420b57cec5SDimitry Andric SDValue SelectCC = FNeg.getOperand(0); 17430b57cec5SDimitry Andric if (SelectCC.getOpcode() != ISD::SELECT_CC || 17440b57cec5SDimitry Andric SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS 17450b57cec5SDimitry Andric SelectCC.getOperand(2).getValueType() != MVT::f32 || // True 17460b57cec5SDimitry Andric !isHWTrueValue(SelectCC.getOperand(2)) || 17470b57cec5SDimitry Andric !isHWFalseValue(SelectCC.getOperand(3))) { 17480b57cec5SDimitry Andric return SDValue(); 17490b57cec5SDimitry Andric } 17500b57cec5SDimitry Andric 17510b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), 17520b57cec5SDimitry Andric SelectCC.getOperand(0), // LHS 17530b57cec5SDimitry Andric SelectCC.getOperand(1), // RHS 17540b57cec5SDimitry Andric DAG.getConstant(-1, DL, MVT::i32), // True 17550b57cec5SDimitry Andric DAG.getConstant(0, DL, MVT::i32), // False 17560b57cec5SDimitry Andric SelectCC.getOperand(4)); // CC 17570b57cec5SDimitry Andric } 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx 17600b57cec5SDimitry Andric // => build_vector elt0, ... , NewEltIdx, ... , eltN 17610b57cec5SDimitry Andric case ISD::INSERT_VECTOR_ELT: { 17620b57cec5SDimitry Andric SDValue InVec = N->getOperand(0); 17630b57cec5SDimitry Andric SDValue InVal = N->getOperand(1); 17640b57cec5SDimitry Andric SDValue EltNo = N->getOperand(2); 17650b57cec5SDimitry Andric 17660b57cec5SDimitry Andric // If the inserted element is an UNDEF, just use the input vector. 17670b57cec5SDimitry Andric if (InVal.isUndef()) 17680b57cec5SDimitry Andric return InVec; 17690b57cec5SDimitry Andric 17700b57cec5SDimitry Andric EVT VT = InVec.getValueType(); 17710b57cec5SDimitry Andric 17720b57cec5SDimitry Andric // If we can't generate a legal BUILD_VECTOR, exit 17730b57cec5SDimitry Andric if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) 17740b57cec5SDimitry Andric return SDValue(); 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric // Check that we know which element is being inserted 17770b57cec5SDimitry Andric if (!isa<ConstantSDNode>(EltNo)) 17780b57cec5SDimitry Andric return SDValue(); 17791db9f3b2SDimitry Andric unsigned Elt = EltNo->getAsZExtVal(); 17800b57cec5SDimitry Andric 17810b57cec5SDimitry Andric // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 17820b57cec5SDimitry Andric // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 17830b57cec5SDimitry Andric // vector elements. 17840b57cec5SDimitry Andric SmallVector<SDValue, 8> Ops; 17850b57cec5SDimitry Andric if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 17860b57cec5SDimitry Andric Ops.append(InVec.getNode()->op_begin(), 17870b57cec5SDimitry Andric InVec.getNode()->op_end()); 17880b57cec5SDimitry Andric } else if (InVec.isUndef()) { 17890b57cec5SDimitry Andric unsigned NElts = VT.getVectorNumElements(); 17900b57cec5SDimitry Andric Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 17910b57cec5SDimitry Andric } else { 17920b57cec5SDimitry Andric return SDValue(); 17930b57cec5SDimitry Andric } 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric // Insert the element 17960b57cec5SDimitry Andric if (Elt < Ops.size()) { 17970b57cec5SDimitry Andric // All the operands of BUILD_VECTOR must have the same type; 17980b57cec5SDimitry Andric // we enforce that here. 17990b57cec5SDimitry Andric EVT OpVT = Ops[0].getValueType(); 18000b57cec5SDimitry Andric if (InVal.getValueType() != OpVT) 18010b57cec5SDimitry Andric InVal = OpVT.bitsGT(InVal.getValueType()) ? 18020b57cec5SDimitry Andric DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : 18030b57cec5SDimitry Andric DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); 18040b57cec5SDimitry Andric Ops[Elt] = InVal; 18050b57cec5SDimitry Andric } 18060b57cec5SDimitry Andric 18070b57cec5SDimitry Andric // Return the new vector 18080b57cec5SDimitry Andric return DAG.getBuildVector(VT, DL, Ops); 18090b57cec5SDimitry Andric } 18100b57cec5SDimitry Andric 18110b57cec5SDimitry Andric // Extract_vec (Build_vector) generated by custom lowering 18120b57cec5SDimitry Andric // also needs to be customly combined 18130b57cec5SDimitry Andric case ISD::EXTRACT_VECTOR_ELT: { 18140b57cec5SDimitry Andric SDValue Arg = N->getOperand(0); 18150b57cec5SDimitry Andric if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 18160b57cec5SDimitry Andric if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 18170b57cec5SDimitry Andric unsigned Element = Const->getZExtValue(); 18180b57cec5SDimitry Andric return Arg->getOperand(Element); 18190b57cec5SDimitry Andric } 18200b57cec5SDimitry Andric } 18210b57cec5SDimitry Andric if (Arg.getOpcode() == ISD::BITCAST && 18220b57cec5SDimitry Andric Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 18230b57cec5SDimitry Andric (Arg.getOperand(0).getValueType().getVectorNumElements() == 18240b57cec5SDimitry Andric Arg.getValueType().getVectorNumElements())) { 18250b57cec5SDimitry Andric if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 18260b57cec5SDimitry Andric unsigned Element = Const->getZExtValue(); 18270b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, DL, N->getVTList(), 18280b57cec5SDimitry Andric Arg->getOperand(0).getOperand(Element)); 18290b57cec5SDimitry Andric } 18300b57cec5SDimitry Andric } 18310b57cec5SDimitry Andric break; 18320b57cec5SDimitry Andric } 18330b57cec5SDimitry Andric 18340b57cec5SDimitry Andric case ISD::SELECT_CC: { 18350b57cec5SDimitry Andric // Try common optimizations 18360b57cec5SDimitry Andric if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI)) 18370b57cec5SDimitry Andric return Ret; 18380b57cec5SDimitry Andric 18390b57cec5SDimitry Andric // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> 18400b57cec5SDimitry Andric // selectcc x, y, a, b, inv(cc) 18410b57cec5SDimitry Andric // 18420b57cec5SDimitry Andric // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> 18430b57cec5SDimitry Andric // selectcc x, y, a, b, cc 18440b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 18450b57cec5SDimitry Andric if (LHS.getOpcode() != ISD::SELECT_CC) { 18460b57cec5SDimitry Andric return SDValue(); 18470b57cec5SDimitry Andric } 18480b57cec5SDimitry Andric 18490b57cec5SDimitry Andric SDValue RHS = N->getOperand(1); 18500b57cec5SDimitry Andric SDValue True = N->getOperand(2); 18510b57cec5SDimitry Andric SDValue False = N->getOperand(3); 18520b57cec5SDimitry Andric ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 18530b57cec5SDimitry Andric 18540b57cec5SDimitry Andric if (LHS.getOperand(2).getNode() != True.getNode() || 18550b57cec5SDimitry Andric LHS.getOperand(3).getNode() != False.getNode() || 18560b57cec5SDimitry Andric RHS.getNode() != False.getNode()) { 18570b57cec5SDimitry Andric return SDValue(); 18580b57cec5SDimitry Andric } 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric switch (NCC) { 18610b57cec5SDimitry Andric default: return SDValue(); 18620b57cec5SDimitry Andric case ISD::SETNE: return LHS; 18630b57cec5SDimitry Andric case ISD::SETEQ: { 18640b57cec5SDimitry Andric ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); 1865480093f4SDimitry Andric LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType()); 18660b57cec5SDimitry Andric if (DCI.isBeforeLegalizeOps() || 18670b57cec5SDimitry Andric isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) 18680b57cec5SDimitry Andric return DAG.getSelectCC(DL, 18690b57cec5SDimitry Andric LHS.getOperand(0), 18700b57cec5SDimitry Andric LHS.getOperand(1), 18710b57cec5SDimitry Andric LHS.getOperand(2), 18720b57cec5SDimitry Andric LHS.getOperand(3), 18730b57cec5SDimitry Andric LHSCC); 18740b57cec5SDimitry Andric break; 18750b57cec5SDimitry Andric } 18760b57cec5SDimitry Andric } 18770b57cec5SDimitry Andric return SDValue(); 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric case AMDGPUISD::R600_EXPORT: { 18810b57cec5SDimitry Andric SDValue Arg = N->getOperand(1); 18820b57cec5SDimitry Andric if (Arg.getOpcode() != ISD::BUILD_VECTOR) 18830b57cec5SDimitry Andric break; 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric SDValue NewArgs[8] = { 18860b57cec5SDimitry Andric N->getOperand(0), // Chain 18870b57cec5SDimitry Andric SDValue(), 18880b57cec5SDimitry Andric N->getOperand(2), // ArrayBase 18890b57cec5SDimitry Andric N->getOperand(3), // Type 18900b57cec5SDimitry Andric N->getOperand(4), // SWZ_X 18910b57cec5SDimitry Andric N->getOperand(5), // SWZ_Y 18920b57cec5SDimitry Andric N->getOperand(6), // SWZ_Z 18930b57cec5SDimitry Andric N->getOperand(7) // SWZ_W 18940b57cec5SDimitry Andric }; 18950b57cec5SDimitry Andric NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); 18960b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); 18970b57cec5SDimitry Andric } 18980b57cec5SDimitry Andric case AMDGPUISD::TEXTURE_FETCH: { 18990b57cec5SDimitry Andric SDValue Arg = N->getOperand(1); 19000b57cec5SDimitry Andric if (Arg.getOpcode() != ISD::BUILD_VECTOR) 19010b57cec5SDimitry Andric break; 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric SDValue NewArgs[19] = { 19040b57cec5SDimitry Andric N->getOperand(0), 19050b57cec5SDimitry Andric N->getOperand(1), 19060b57cec5SDimitry Andric N->getOperand(2), 19070b57cec5SDimitry Andric N->getOperand(3), 19080b57cec5SDimitry Andric N->getOperand(4), 19090b57cec5SDimitry Andric N->getOperand(5), 19100b57cec5SDimitry Andric N->getOperand(6), 19110b57cec5SDimitry Andric N->getOperand(7), 19120b57cec5SDimitry Andric N->getOperand(8), 19130b57cec5SDimitry Andric N->getOperand(9), 19140b57cec5SDimitry Andric N->getOperand(10), 19150b57cec5SDimitry Andric N->getOperand(11), 19160b57cec5SDimitry Andric N->getOperand(12), 19170b57cec5SDimitry Andric N->getOperand(13), 19180b57cec5SDimitry Andric N->getOperand(14), 19190b57cec5SDimitry Andric N->getOperand(15), 19200b57cec5SDimitry Andric N->getOperand(16), 19210b57cec5SDimitry Andric N->getOperand(17), 19220b57cec5SDimitry Andric N->getOperand(18), 19230b57cec5SDimitry Andric }; 19240b57cec5SDimitry Andric NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL); 19250b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs); 19260b57cec5SDimitry Andric } 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric case ISD::LOAD: { 19290b57cec5SDimitry Andric LoadSDNode *LoadNode = cast<LoadSDNode>(N); 19300b57cec5SDimitry Andric SDValue Ptr = LoadNode->getBasePtr(); 19310b57cec5SDimitry Andric if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS && 19320b57cec5SDimitry Andric isa<ConstantSDNode>(Ptr)) 19330b57cec5SDimitry Andric return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG); 19340b57cec5SDimitry Andric break; 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric 19370b57cec5SDimitry Andric default: break; 19380b57cec5SDimitry Andric } 19390b57cec5SDimitry Andric 19400b57cec5SDimitry Andric return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, 19440b57cec5SDimitry Andric SDValue &Src, SDValue &Neg, SDValue &Abs, 19450b57cec5SDimitry Andric SDValue &Sel, SDValue &Imm, 19460b57cec5SDimitry Andric SelectionDAG &DAG) const { 19470b57cec5SDimitry Andric const R600InstrInfo *TII = Subtarget->getInstrInfo(); 19480b57cec5SDimitry Andric if (!Src.isMachineOpcode()) 19490b57cec5SDimitry Andric return false; 19500b57cec5SDimitry Andric 19510b57cec5SDimitry Andric switch (Src.getMachineOpcode()) { 19520b57cec5SDimitry Andric case R600::FNEG_R600: 19530b57cec5SDimitry Andric if (!Neg.getNode()) 19540b57cec5SDimitry Andric return false; 19550b57cec5SDimitry Andric Src = Src.getOperand(0); 19560b57cec5SDimitry Andric Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 19570b57cec5SDimitry Andric return true; 19580b57cec5SDimitry Andric case R600::FABS_R600: 19590b57cec5SDimitry Andric if (!Abs.getNode()) 19600b57cec5SDimitry Andric return false; 19610b57cec5SDimitry Andric Src = Src.getOperand(0); 19620b57cec5SDimitry Andric Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 19630b57cec5SDimitry Andric return true; 19640b57cec5SDimitry Andric case R600::CONST_COPY: { 19650b57cec5SDimitry Andric unsigned Opcode = ParentNode->getMachineOpcode(); 19660b57cec5SDimitry Andric bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 19670b57cec5SDimitry Andric 19680b57cec5SDimitry Andric if (!Sel.getNode()) 19690b57cec5SDimitry Andric return false; 19700b57cec5SDimitry Andric 19710b57cec5SDimitry Andric SDValue CstOffset = Src.getOperand(0); 19720b57cec5SDimitry Andric if (ParentNode->getValueType(0).isVector()) 19730b57cec5SDimitry Andric return false; 19740b57cec5SDimitry Andric 19750b57cec5SDimitry Andric // Gather constants values 19760b57cec5SDimitry Andric int SrcIndices[] = { 19770b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0), 19780b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1), 19790b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src2), 19800b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_X), 19810b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 19820b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 19830b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_W), 19840b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_X), 19850b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 19860b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 19870b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_W) 19880b57cec5SDimitry Andric }; 19890b57cec5SDimitry Andric std::vector<unsigned> Consts; 19900b57cec5SDimitry Andric for (int OtherSrcIdx : SrcIndices) { 19910b57cec5SDimitry Andric int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); 19920b57cec5SDimitry Andric if (OtherSrcIdx < 0 || OtherSelIdx < 0) 19930b57cec5SDimitry Andric continue; 19940b57cec5SDimitry Andric if (HasDst) { 19950b57cec5SDimitry Andric OtherSrcIdx--; 19960b57cec5SDimitry Andric OtherSelIdx--; 19970b57cec5SDimitry Andric } 19980b57cec5SDimitry Andric if (RegisterSDNode *Reg = 19990b57cec5SDimitry Andric dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { 20000b57cec5SDimitry Andric if (Reg->getReg() == R600::ALU_CONST) { 20017a6dacacSDimitry Andric Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx)); 20020b57cec5SDimitry Andric } 20030b57cec5SDimitry Andric } 20040b57cec5SDimitry Andric } 20050b57cec5SDimitry Andric 20060b57cec5SDimitry Andric ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset); 20070b57cec5SDimitry Andric Consts.push_back(Cst->getZExtValue()); 20080b57cec5SDimitry Andric if (!TII->fitsConstReadLimitations(Consts)) { 20090b57cec5SDimitry Andric return false; 20100b57cec5SDimitry Andric } 20110b57cec5SDimitry Andric 20120b57cec5SDimitry Andric Sel = CstOffset; 20130b57cec5SDimitry Andric Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); 20140b57cec5SDimitry Andric return true; 20150b57cec5SDimitry Andric } 20160b57cec5SDimitry Andric case R600::MOV_IMM_GLOBAL_ADDR: 20170b57cec5SDimitry Andric // Check if the Imm slot is used. Taken from below. 20181db9f3b2SDimitry Andric if (Imm->getAsZExtVal()) 20190b57cec5SDimitry Andric return false; 20200b57cec5SDimitry Andric Imm = Src.getOperand(0); 20210b57cec5SDimitry Andric Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); 20220b57cec5SDimitry Andric return true; 20230b57cec5SDimitry Andric case R600::MOV_IMM_I32: 20240b57cec5SDimitry Andric case R600::MOV_IMM_F32: { 20250b57cec5SDimitry Andric unsigned ImmReg = R600::ALU_LITERAL_X; 20260b57cec5SDimitry Andric uint64_t ImmValue = 0; 20270b57cec5SDimitry Andric 20280b57cec5SDimitry Andric if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { 2029e8d8bef9SDimitry Andric ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0)); 20300b57cec5SDimitry Andric float FloatValue = FPC->getValueAPF().convertToFloat(); 20310b57cec5SDimitry Andric if (FloatValue == 0.0) { 20320b57cec5SDimitry Andric ImmReg = R600::ZERO; 20330b57cec5SDimitry Andric } else if (FloatValue == 0.5) { 20340b57cec5SDimitry Andric ImmReg = R600::HALF; 20350b57cec5SDimitry Andric } else if (FloatValue == 1.0) { 20360b57cec5SDimitry Andric ImmReg = R600::ONE; 20370b57cec5SDimitry Andric } else { 20380b57cec5SDimitry Andric ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); 20390b57cec5SDimitry Andric } 20400b57cec5SDimitry Andric } else { 20417a6dacacSDimitry Andric uint64_t Value = Src.getConstantOperandVal(0); 20420b57cec5SDimitry Andric if (Value == 0) { 20430b57cec5SDimitry Andric ImmReg = R600::ZERO; 20440b57cec5SDimitry Andric } else if (Value == 1) { 20450b57cec5SDimitry Andric ImmReg = R600::ONE_INT; 20460b57cec5SDimitry Andric } else { 20470b57cec5SDimitry Andric ImmValue = Value; 20480b57cec5SDimitry Andric } 20490b57cec5SDimitry Andric } 20500b57cec5SDimitry Andric 20510b57cec5SDimitry Andric // Check that we aren't already using an immediate. 20520b57cec5SDimitry Andric // XXX: It's possible for an instruction to have more than one 20530b57cec5SDimitry Andric // immediate operand, but this is not supported yet. 20540b57cec5SDimitry Andric if (ImmReg == R600::ALU_LITERAL_X) { 20550b57cec5SDimitry Andric if (!Imm.getNode()) 20560b57cec5SDimitry Andric return false; 2057e8d8bef9SDimitry Andric ConstantSDNode *C = cast<ConstantSDNode>(Imm); 20580b57cec5SDimitry Andric if (C->getZExtValue()) 20590b57cec5SDimitry Andric return false; 20600b57cec5SDimitry Andric Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32); 20610b57cec5SDimitry Andric } 20620b57cec5SDimitry Andric Src = DAG.getRegister(ImmReg, MVT::i32); 20630b57cec5SDimitry Andric return true; 20640b57cec5SDimitry Andric } 20650b57cec5SDimitry Andric default: 20660b57cec5SDimitry Andric return false; 20670b57cec5SDimitry Andric } 20680b57cec5SDimitry Andric } 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric /// Fold the instructions after selecting them 20710b57cec5SDimitry Andric SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, 20720b57cec5SDimitry Andric SelectionDAG &DAG) const { 20730b57cec5SDimitry Andric const R600InstrInfo *TII = Subtarget->getInstrInfo(); 20740b57cec5SDimitry Andric if (!Node->isMachineOpcode()) 20750b57cec5SDimitry Andric return Node; 20760b57cec5SDimitry Andric 20770b57cec5SDimitry Andric unsigned Opcode = Node->getMachineOpcode(); 20780b57cec5SDimitry Andric SDValue FakeOp; 20790b57cec5SDimitry Andric 20800b57cec5SDimitry Andric std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); 20810b57cec5SDimitry Andric 20820b57cec5SDimitry Andric if (Opcode == R600::DOT_4) { 20830b57cec5SDimitry Andric int OperandIdx[] = { 20840b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_X), 20850b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 20860b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 20870b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_W), 20880b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_X), 20890b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 20900b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 20910b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_W) 20920b57cec5SDimitry Andric }; 20930b57cec5SDimitry Andric int NegIdx[] = { 20940b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), 20950b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), 20960b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), 20970b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), 20980b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), 20990b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), 21000b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), 21010b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) 21020b57cec5SDimitry Andric }; 21030b57cec5SDimitry Andric int AbsIdx[] = { 21040b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), 21050b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), 21060b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), 21070b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), 21080b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), 21090b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), 21100b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), 21110b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) 21120b57cec5SDimitry Andric }; 21130b57cec5SDimitry Andric for (unsigned i = 0; i < 8; i++) { 21140b57cec5SDimitry Andric if (OperandIdx[i] < 0) 21150b57cec5SDimitry Andric return Node; 21160b57cec5SDimitry Andric SDValue &Src = Ops[OperandIdx[i] - 1]; 21170b57cec5SDimitry Andric SDValue &Neg = Ops[NegIdx[i] - 1]; 21180b57cec5SDimitry Andric SDValue &Abs = Ops[AbsIdx[i] - 1]; 21190b57cec5SDimitry Andric bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 21200b57cec5SDimitry Andric int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 21210b57cec5SDimitry Andric if (HasDst) 21220b57cec5SDimitry Andric SelIdx--; 21230b57cec5SDimitry Andric SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 21240b57cec5SDimitry Andric if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) 21250b57cec5SDimitry Andric return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 21260b57cec5SDimitry Andric } 21270b57cec5SDimitry Andric } else if (Opcode == R600::REG_SEQUENCE) { 21280b57cec5SDimitry Andric for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { 21290b57cec5SDimitry Andric SDValue &Src = Ops[i]; 21300b57cec5SDimitry Andric if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) 21310b57cec5SDimitry Andric return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 21320b57cec5SDimitry Andric } 21330b57cec5SDimitry Andric } else { 21340b57cec5SDimitry Andric if (!TII->hasInstrModifiers(Opcode)) 21350b57cec5SDimitry Andric return Node; 21360b57cec5SDimitry Andric int OperandIdx[] = { 21370b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0), 21380b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1), 21390b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src2) 21400b57cec5SDimitry Andric }; 21410b57cec5SDimitry Andric int NegIdx[] = { 21420b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_neg), 21430b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_neg), 21440b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src2_neg) 21450b57cec5SDimitry Andric }; 21460b57cec5SDimitry Andric int AbsIdx[] = { 21470b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src0_abs), 21480b57cec5SDimitry Andric TII->getOperandIdx(Opcode, R600::OpName::src1_abs), 21490b57cec5SDimitry Andric -1 21500b57cec5SDimitry Andric }; 21510b57cec5SDimitry Andric for (unsigned i = 0; i < 3; i++) { 21520b57cec5SDimitry Andric if (OperandIdx[i] < 0) 21530b57cec5SDimitry Andric return Node; 21540b57cec5SDimitry Andric SDValue &Src = Ops[OperandIdx[i] - 1]; 21550b57cec5SDimitry Andric SDValue &Neg = Ops[NegIdx[i] - 1]; 21560b57cec5SDimitry Andric SDValue FakeAbs; 21570b57cec5SDimitry Andric SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; 21580b57cec5SDimitry Andric bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 21590b57cec5SDimitry Andric int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 21600b57cec5SDimitry Andric int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); 21610b57cec5SDimitry Andric if (HasDst) { 21620b57cec5SDimitry Andric SelIdx--; 21630b57cec5SDimitry Andric ImmIdx--; 21640b57cec5SDimitry Andric } 21650b57cec5SDimitry Andric SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 21660b57cec5SDimitry Andric SDValue &Imm = Ops[ImmIdx]; 21670b57cec5SDimitry Andric if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) 21680b57cec5SDimitry Andric return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 21690b57cec5SDimitry Andric } 21700b57cec5SDimitry Andric } 21710b57cec5SDimitry Andric 21720b57cec5SDimitry Andric return Node; 21730b57cec5SDimitry Andric } 217406c3fb27SDimitry Andric 217506c3fb27SDimitry Andric TargetLowering::AtomicExpansionKind 217606c3fb27SDimitry Andric R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 217706c3fb27SDimitry Andric switch (RMW->getOperation()) { 217806c3fb27SDimitry Andric case AtomicRMWInst::UIncWrap: 217906c3fb27SDimitry Andric case AtomicRMWInst::UDecWrap: 218006c3fb27SDimitry Andric // FIXME: Cayman at least appears to have instructions for this, but the 218106c3fb27SDimitry Andric // instruction defintions appear to be missing. 218206c3fb27SDimitry Andric return AtomicExpansionKind::CmpXChg; 218306c3fb27SDimitry Andric default: 218406c3fb27SDimitry Andric break; 218506c3fb27SDimitry Andric } 218606c3fb27SDimitry Andric 218706c3fb27SDimitry Andric return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); 218806c3fb27SDimitry Andric } 2189