1 //===- AMDGPUGlobalISelUtils.cpp ---------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUGlobalISelUtils.h" 10 #include "AMDGPURegisterBankInfo.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "llvm/ADT/DenseSet.h" 13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 15 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 16 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 17 #include "llvm/CodeGenTypes/LowLevelType.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 21 using namespace llvm; 22 using namespace AMDGPU; 23 using namespace MIPatternMatch; 24 25 std::pair<Register, unsigned> 26 AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, 27 GISelKnownBits *KnownBits, bool CheckNUW) { 28 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 29 if (Def->getOpcode() == TargetOpcode::G_CONSTANT) { 30 unsigned Offset; 31 const MachineOperand &Op = Def->getOperand(1); 32 if (Op.isImm()) 33 Offset = Op.getImm(); 34 else 35 Offset = Op.getCImm()->getZExtValue(); 36 37 return std::pair(Register(), Offset); 38 } 39 40 int64_t Offset; 41 if (Def->getOpcode() == TargetOpcode::G_ADD) { 42 // A 32-bit (address + offset) should not cause unsigned 32-bit integer 43 // wraparound, because s_load instructions perform the addition in 64 bits. 44 if (CheckNUW && !Def->getFlag(MachineInstr::NoUWrap)) { 45 assert(MRI.getType(Reg).getScalarSizeInBits() == 32); 46 return std::pair(Reg, 0); 47 } 48 // TODO: Handle G_OR used for add case 49 if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset))) 50 return std::pair(Def->getOperand(1).getReg(), Offset); 51 52 // FIXME: matcher should ignore copies 53 if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset)))) 54 return std::pair(Def->getOperand(1).getReg(), Offset); 55 } 56 57 Register Base; 58 if (KnownBits && mi_match(Reg, MRI, m_GOr(m_Reg(Base), m_ICst(Offset))) && 59 KnownBits->maskedValueIsZero(Base, APInt(32, Offset, /*isSigned=*/true))) 60 return std::pair(Base, Offset); 61 62 // Handle G_PTRTOINT (G_PTR_ADD base, const) case 63 if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) { 64 MachineInstr *Base; 65 if (mi_match(Def->getOperand(1).getReg(), MRI, 66 m_GPtrAdd(m_MInstr(Base), m_ICst(Offset)))) { 67 // If Base was int converted to pointer, simply return int and offset. 68 if (Base->getOpcode() == TargetOpcode::G_INTTOPTR) 69 return std::pair(Base->getOperand(1).getReg(), Offset); 70 71 // Register returned here will be of pointer type. 72 return std::pair(Base->getOperand(0).getReg(), Offset); 73 } 74 } 75 76 return std::pair(Reg, 0); 77 } 78 79 IntrinsicLaneMaskAnalyzer::IntrinsicLaneMaskAnalyzer(MachineFunction &MF) 80 : MRI(MF.getRegInfo()) { 81 initLaneMaskIntrinsics(MF); 82 } 83 84 bool IntrinsicLaneMaskAnalyzer::isS32S64LaneMask(Register Reg) const { 85 return S32S64LaneMask.contains(Reg); 86 } 87 88 void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) { 89 for (auto &MBB : MF) { 90 for (auto &MI : MBB) { 91 GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI); 92 if (GI && GI->is(Intrinsic::amdgcn_if_break)) { 93 S32S64LaneMask.insert(MI.getOperand(3).getReg()); 94 findLCSSAPhi(MI.getOperand(0).getReg()); 95 } 96 97 if (MI.getOpcode() == AMDGPU::SI_IF || 98 MI.getOpcode() == AMDGPU::SI_ELSE) { 99 findLCSSAPhi(MI.getOperand(0).getReg()); 100 } 101 } 102 } 103 } 104 105 void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) { 106 S32S64LaneMask.insert(Reg); 107 for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) { 108 if (LCSSAPhi.isPHI()) 109 S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg()); 110 } 111 } 112 113 static LLT getReadAnyLaneSplitTy(LLT Ty) { 114 if (Ty.isVector()) { 115 LLT ElTy = Ty.getElementType(); 116 if (ElTy.getSizeInBits() == 16) 117 return LLT::fixed_vector(2, ElTy); 118 // S32, S64 or pointer 119 return ElTy; 120 } 121 122 // Large scalars and 64-bit pointers 123 return LLT::scalar(32); 124 } 125 126 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, 127 const RegisterBankInfo &RBI); 128 129 static void unmergeReadAnyLane(MachineIRBuilder &B, 130 SmallVectorImpl<Register> &SgprDstParts, 131 LLT UnmergeTy, Register VgprSrc, 132 const RegisterBankInfo &RBI) { 133 const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID); 134 auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc); 135 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { 136 SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI)); 137 } 138 } 139 140 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, 141 const RegisterBankInfo &RBI) { 142 LLT Ty = B.getMRI()->getType(VgprSrc); 143 const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID); 144 if (Ty.getSizeInBits() == 32) { 145 return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc}) 146 .getReg(0); 147 } 148 149 SmallVector<Register, 8> SgprDstParts; 150 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); 151 152 return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0); 153 } 154 155 void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, 156 Register VgprSrc, const RegisterBankInfo &RBI) { 157 LLT Ty = B.getMRI()->getType(VgprSrc); 158 if (Ty.getSizeInBits() == 32) { 159 B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc}); 160 return; 161 } 162 163 SmallVector<Register, 8> SgprDstParts; 164 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); 165 166 B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0); 167 } 168