1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines an instruction selector for the AMDGPU target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUArgumentUsageInfo.h" 16 #include "AMDGPUISelLowering.h" // For AMDGPUISD 17 #include "AMDGPUInstrInfo.h" 18 #include "AMDGPUPerfHintAnalysis.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 32 #include "llvm/Analysis/ValueTracking.h" 33 #include "llvm/CodeGen/FunctionLoweringInfo.h" 34 #include "llvm/CodeGen/ISDOpcodes.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/SelectionDAG.h" 38 #include "llvm/CodeGen/SelectionDAGISel.h" 39 #include "llvm/CodeGen/SelectionDAGNodes.h" 40 #include "llvm/CodeGen/ValueTypes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #ifdef EXPENSIVE_CHECKS 43 #include "llvm/IR/Dominators.h" 44 #endif 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/MC/MCInstrDesc.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/MachineValueType.h" 51 #include "llvm/Support/MathExtras.h" 52 #include <cassert> 53 #include <cstdint> 54 #include <new> 55 #include <vector> 56 57 #define DEBUG_TYPE "isel" 58 59 using namespace llvm; 60 61 namespace llvm { 62 63 class R600InstrInfo; 64 65 } // end namespace llvm 66 67 //===----------------------------------------------------------------------===// 68 // Instruction Selector Implementation 69 //===----------------------------------------------------------------------===// 70 71 namespace { 72 73 static bool isNullConstantOrUndef(SDValue V) { 74 if (V.isUndef()) 75 return true; 76 77 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 78 return Const != nullptr && Const->isNullValue(); 79 } 80 81 static bool getConstantValue(SDValue N, uint32_t &Out) { 82 // This is only used for packed vectors, where ussing 0 for undef should 83 // always be good. 84 if (N.isUndef()) { 85 Out = 0; 86 return true; 87 } 88 89 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 90 Out = C->getAPIntValue().getSExtValue(); 91 return true; 92 } 93 94 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 95 Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); 96 return true; 97 } 98 99 return false; 100 } 101 102 // TODO: Handle undef as zero 103 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, 104 bool Negate = false) { 105 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); 106 uint32_t LHSVal, RHSVal; 107 if (getConstantValue(N->getOperand(0), LHSVal) && 108 getConstantValue(N->getOperand(1), RHSVal)) { 109 SDLoc SL(N); 110 uint32_t K = Negate ? 111 (-LHSVal & 0xffff) | (-RHSVal << 16) : 112 (LHSVal & 0xffff) | (RHSVal << 16); 113 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), 114 DAG.getTargetConstant(K, SL, MVT::i32)); 115 } 116 117 return nullptr; 118 } 119 120 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { 121 return packConstantV2I16(N, DAG, true); 122 } 123 124 /// AMDGPU specific code to select AMDGPU machine instructions for 125 /// SelectionDAG operations. 126 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 127 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 128 // make the right decision when generating code for different targets. 129 const GCNSubtarget *Subtarget; 130 bool EnableLateStructurizeCFG; 131 132 public: 133 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 134 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 135 : SelectionDAGISel(*TM, OptLevel) { 136 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 137 } 138 ~AMDGPUDAGToDAGISel() override = default; 139 140 void getAnalysisUsage(AnalysisUsage &AU) const override { 141 AU.addRequired<AMDGPUArgumentUsageInfo>(); 142 AU.addRequired<LegacyDivergenceAnalysis>(); 143 #ifdef EXPENSIVE_CHECKS 144 AU.addRequired<DominatorTreeWrapperPass>(); 145 AU.addRequired<LoopInfoWrapperPass>(); 146 #endif 147 SelectionDAGISel::getAnalysisUsage(AU); 148 } 149 150 bool matchLoadD16FromBuildVector(SDNode *N) const; 151 152 bool runOnMachineFunction(MachineFunction &MF) override; 153 void PreprocessISelDAG() override; 154 void Select(SDNode *N) override; 155 StringRef getPassName() const override; 156 void PostprocessISelDAG() override; 157 158 protected: 159 void SelectBuildVector(SDNode *N, unsigned RegClassID); 160 161 private: 162 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 163 bool isNoNanSrc(SDValue N) const; 164 bool isInlineImmediate(const SDNode *N, bool Negated = false) const; 165 bool isNegInlineImmediate(const SDNode *N) const { 166 return isInlineImmediate(N, true); 167 } 168 169 bool isVGPRImm(const SDNode *N) const; 170 bool isUniformLoad(const SDNode *N) const; 171 bool isUniformBr(const SDNode *N) const; 172 173 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; 174 175 SDNode *glueCopyToM0LDSInit(SDNode *N) const; 176 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; 177 178 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 179 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 180 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 181 bool isDSOffsetLegal(SDValue Base, unsigned Offset, 182 unsigned OffsetBits) const; 183 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 184 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 185 SDValue &Offset1) const; 186 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 187 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 188 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 189 SDValue &TFE, SDValue &DLC) const; 190 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 191 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 192 SDValue &SLC, SDValue &TFE, SDValue &DLC) const; 193 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 194 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 195 SDValue &SLC) const; 196 bool SelectMUBUFScratchOffen(SDNode *Parent, 197 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 198 SDValue &SOffset, SDValue &ImmOffset) const; 199 bool SelectMUBUFScratchOffset(SDNode *Parent, 200 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 201 SDValue &Offset) const; 202 203 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 204 SDValue &Offset, SDValue &GLC, SDValue &SLC, 205 SDValue &TFE, SDValue &DLC) const; 206 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 207 SDValue &Offset, SDValue &SLC) const; 208 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 209 SDValue &Offset) const; 210 211 bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, 212 SDValue &Offset, SDValue &SLC) const; 213 bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, 214 SDValue &Offset, SDValue &SLC) const; 215 216 template <bool IsSigned> 217 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 218 SDValue &Offset, SDValue &SLC) const; 219 220 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 221 bool &Imm) const; 222 SDValue Expand32BitAddress(SDValue Addr) const; 223 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 224 bool &Imm) const; 225 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 226 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 227 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 228 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 229 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 230 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 231 232 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 233 bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const; 234 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 235 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 236 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 237 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 238 SDValue &Clamp, SDValue &Omod) const; 239 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 240 SDValue &Clamp, SDValue &Omod) const; 241 242 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 243 SDValue &Clamp, 244 SDValue &Omod) const; 245 246 bool SelectVOP3OMods(SDValue In, SDValue &Src, 247 SDValue &Clamp, SDValue &Omod) const; 248 249 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 250 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 251 SDValue &Clamp) const; 252 253 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 254 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 255 SDValue &Clamp) const; 256 257 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 258 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 259 SDValue &Clamp) const; 260 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 261 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 262 263 SDValue getHi16Elt(SDValue In) const; 264 265 void SelectADD_SUB_I64(SDNode *N); 266 void SelectAddcSubb(SDNode *N); 267 void SelectUADDO_USUBO(SDNode *N); 268 void SelectDIV_SCALE(SDNode *N); 269 void SelectDIV_FMAS(SDNode *N); 270 void SelectMAD_64_32(SDNode *N); 271 void SelectFMA_W_CHAIN(SDNode *N); 272 void SelectFMUL_W_CHAIN(SDNode *N); 273 274 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 275 uint32_t Offset, uint32_t Width); 276 void SelectS_BFEFromShifts(SDNode *N); 277 void SelectS_BFE(SDNode *N); 278 bool isCBranchSCC(const SDNode *N) const; 279 void SelectBRCOND(SDNode *N); 280 void SelectFMAD_FMA(SDNode *N); 281 void SelectATOMIC_CMP_SWAP(SDNode *N); 282 void SelectDSAppendConsume(SDNode *N, unsigned IntrID); 283 void SelectDS_GWS(SDNode *N, unsigned IntrID); 284 void SelectINTRINSIC_W_CHAIN(SDNode *N); 285 void SelectINTRINSIC_VOID(SDNode *N); 286 287 protected: 288 // Include the pieces autogenerated from the target description. 289 #include "AMDGPUGenDAGISel.inc" 290 }; 291 292 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 293 const R600Subtarget *Subtarget; 294 295 bool isConstantLoad(const MemSDNode *N, int cbID) const; 296 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 297 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 298 SDValue& Offset); 299 public: 300 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 301 AMDGPUDAGToDAGISel(TM, OptLevel) {} 302 303 void Select(SDNode *N) override; 304 305 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 306 SDValue &Offset) override; 307 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 308 SDValue &Offset) override; 309 310 bool runOnMachineFunction(MachineFunction &MF) override; 311 312 void PreprocessISelDAG() override {} 313 314 protected: 315 // Include the pieces autogenerated from the target description. 316 #include "R600GenDAGISel.inc" 317 }; 318 319 static SDValue stripBitcast(SDValue Val) { 320 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 321 } 322 323 // Figure out if this is really an extract of the high 16-bits of a dword. 324 static bool isExtractHiElt(SDValue In, SDValue &Out) { 325 In = stripBitcast(In); 326 if (In.getOpcode() != ISD::TRUNCATE) 327 return false; 328 329 SDValue Srl = In.getOperand(0); 330 if (Srl.getOpcode() == ISD::SRL) { 331 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 332 if (ShiftAmt->getZExtValue() == 16) { 333 Out = stripBitcast(Srl.getOperand(0)); 334 return true; 335 } 336 } 337 } 338 339 return false; 340 } 341 342 // Look through operations that obscure just looking at the low 16-bits of the 343 // same register. 344 static SDValue stripExtractLoElt(SDValue In) { 345 if (In.getOpcode() == ISD::TRUNCATE) { 346 SDValue Src = In.getOperand(0); 347 if (Src.getValueType().getSizeInBits() == 32) 348 return stripBitcast(Src); 349 } 350 351 return In; 352 } 353 354 } // end anonymous namespace 355 356 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", 357 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 358 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 359 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) 360 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 361 #ifdef EXPENSIVE_CHECKS 362 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 363 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 364 #endif 365 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel", 366 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 367 368 /// This pass converts a legalized DAG into a AMDGPU-specific 369 // DAG, ready for instruction scheduling. 370 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 371 CodeGenOpt::Level OptLevel) { 372 return new AMDGPUDAGToDAGISel(TM, OptLevel); 373 } 374 375 /// This pass converts a legalized DAG into a R600-specific 376 // DAG, ready for instruction scheduling. 377 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 378 CodeGenOpt::Level OptLevel) { 379 return new R600DAGToDAGISel(TM, OptLevel); 380 } 381 382 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 383 #ifdef EXPENSIVE_CHECKS 384 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 385 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 386 for (auto &L : LI->getLoopsInPreorder()) { 387 assert(L->isLCSSAForm(DT)); 388 } 389 #endif 390 Subtarget = &MF.getSubtarget<GCNSubtarget>(); 391 return SelectionDAGISel::runOnMachineFunction(MF); 392 } 393 394 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { 395 assert(Subtarget->d16PreservesUnusedBits()); 396 MVT VT = N->getValueType(0).getSimpleVT(); 397 if (VT != MVT::v2i16 && VT != MVT::v2f16) 398 return false; 399 400 SDValue Lo = N->getOperand(0); 401 SDValue Hi = N->getOperand(1); 402 403 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi)); 404 405 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo 406 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo 407 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo 408 409 // Need to check for possible indirect dependencies on the other half of the 410 // vector to avoid introducing a cycle. 411 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) { 412 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); 413 414 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo); 415 SDValue Ops[] = { 416 LdHi->getChain(), LdHi->getBasePtr(), TiedIn 417 }; 418 419 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI; 420 if (LdHi->getMemoryVT() == MVT::i8) { 421 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ? 422 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8; 423 } else { 424 assert(LdHi->getMemoryVT() == MVT::i16); 425 } 426 427 SDValue NewLoadHi = 428 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList, 429 Ops, LdHi->getMemoryVT(), 430 LdHi->getMemOperand()); 431 432 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi); 433 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1)); 434 return true; 435 } 436 437 // build_vector (load ptr), hi -> load_d16_lo ptr, hi 438 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi 439 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi 440 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo)); 441 if (LdLo && Lo.hasOneUse()) { 442 SDValue TiedIn = getHi16Elt(Hi); 443 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode())) 444 return false; 445 446 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); 447 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO; 448 if (LdLo->getMemoryVT() == MVT::i8) { 449 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ? 450 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8; 451 } else { 452 assert(LdLo->getMemoryVT() == MVT::i16); 453 } 454 455 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn); 456 457 SDValue Ops[] = { 458 LdLo->getChain(), LdLo->getBasePtr(), TiedIn 459 }; 460 461 SDValue NewLoadLo = 462 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList, 463 Ops, LdLo->getMemoryVT(), 464 LdLo->getMemOperand()); 465 466 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo); 467 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1)); 468 return true; 469 } 470 471 return false; 472 } 473 474 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 475 if (!Subtarget->d16PreservesUnusedBits()) 476 return; 477 478 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 479 480 bool MadeChange = false; 481 while (Position != CurDAG->allnodes_begin()) { 482 SDNode *N = &*--Position; 483 if (N->use_empty()) 484 continue; 485 486 switch (N->getOpcode()) { 487 case ISD::BUILD_VECTOR: 488 MadeChange |= matchLoadD16FromBuildVector(N); 489 break; 490 default: 491 break; 492 } 493 } 494 495 if (MadeChange) { 496 CurDAG->RemoveDeadNodes(); 497 LLVM_DEBUG(dbgs() << "After PreProcess:\n"; 498 CurDAG->dump();); 499 } 500 } 501 502 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 503 if (TM.Options.NoNaNsFPMath) 504 return true; 505 506 // TODO: Move into isKnownNeverNaN 507 if (N->getFlags().isDefined()) 508 return N->getFlags().hasNoNaNs(); 509 510 return CurDAG->isKnownNeverNaN(N); 511 } 512 513 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N, 514 bool Negated) const { 515 if (N->isUndef()) 516 return true; 517 518 const SIInstrInfo *TII = Subtarget->getInstrInfo(); 519 if (Negated) { 520 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 521 return TII->isInlineConstant(-C->getAPIntValue()); 522 523 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 524 return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt()); 525 526 } else { 527 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 528 return TII->isInlineConstant(C->getAPIntValue()); 529 530 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 531 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 532 } 533 534 return false; 535 } 536 537 /// Determine the register class for \p OpNo 538 /// \returns The register class of the virtual register that will be used for 539 /// the given operand number \OpNo or NULL if the register class cannot be 540 /// determined. 541 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 542 unsigned OpNo) const { 543 if (!N->isMachineOpcode()) { 544 if (N->getOpcode() == ISD::CopyToReg) { 545 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 546 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 547 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 548 return MRI.getRegClass(Reg); 549 } 550 551 const SIRegisterInfo *TRI 552 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); 553 return TRI->getPhysRegClass(Reg); 554 } 555 556 return nullptr; 557 } 558 559 switch (N->getMachineOpcode()) { 560 default: { 561 const MCInstrDesc &Desc = 562 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 563 unsigned OpIdx = Desc.getNumDefs() + OpNo; 564 if (OpIdx >= Desc.getNumOperands()) 565 return nullptr; 566 int RegClass = Desc.OpInfo[OpIdx].RegClass; 567 if (RegClass == -1) 568 return nullptr; 569 570 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 571 } 572 case AMDGPU::REG_SEQUENCE: { 573 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 574 const TargetRegisterClass *SuperRC = 575 Subtarget->getRegisterInfo()->getRegClass(RCID); 576 577 SDValue SubRegOp = N->getOperand(OpNo + 1); 578 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 579 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 580 SubRegIdx); 581 } 582 } 583 } 584 585 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { 586 const SITargetLowering& Lowering = 587 *static_cast<const SITargetLowering*>(getTargetLowering()); 588 589 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); 590 591 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), 592 Val); 593 594 SDValue Glue = M0.getValue(1); 595 596 SmallVector <SDValue, 8> Ops; 597 Ops.push_back(M0); // Replace the chain. 598 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 599 Ops.push_back(N->getOperand(i)); 600 601 Ops.push_back(Glue); 602 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 603 } 604 605 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { 606 unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); 607 if (AS == AMDGPUAS::LOCAL_ADDRESS) { 608 if (Subtarget->ldsRequiresM0Init()) 609 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 610 } else if (AS == AMDGPUAS::REGION_ADDRESS) { 611 MachineFunction &MF = CurDAG->getMachineFunction(); 612 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize(); 613 return 614 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32)); 615 } 616 return N; 617 } 618 619 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, 620 EVT VT) const { 621 SDNode *Lo = CurDAG->getMachineNode( 622 AMDGPU::S_MOV_B32, DL, MVT::i32, 623 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); 624 SDNode *Hi = 625 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 626 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32)); 627 const SDValue Ops[] = { 628 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 629 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 630 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; 631 632 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); 633 } 634 635 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 636 switch (NumVectorElts) { 637 case 1: 638 return AMDGPU::SReg_32_XM0RegClassID; 639 case 2: 640 return AMDGPU::SReg_64RegClassID; 641 case 3: 642 return AMDGPU::SGPR_96RegClassID; 643 case 4: 644 return AMDGPU::SReg_128RegClassID; 645 case 5: 646 return AMDGPU::SGPR_160RegClassID; 647 case 8: 648 return AMDGPU::SReg_256RegClassID; 649 case 16: 650 return AMDGPU::SReg_512RegClassID; 651 case 32: 652 return AMDGPU::SReg_1024RegClassID; 653 } 654 655 llvm_unreachable("invalid vector size"); 656 } 657 658 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 659 EVT VT = N->getValueType(0); 660 unsigned NumVectorElts = VT.getVectorNumElements(); 661 EVT EltVT = VT.getVectorElementType(); 662 SDLoc DL(N); 663 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 664 665 if (NumVectorElts == 1) { 666 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 667 RegClass); 668 return; 669 } 670 671 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not " 672 "supported yet"); 673 // 32 = Max Num Vector Elements 674 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 675 // 1 = Vector Register Class 676 SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 677 678 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 679 bool IsRegSeq = true; 680 unsigned NOps = N->getNumOperands(); 681 for (unsigned i = 0; i < NOps; i++) { 682 // XXX: Why is this here? 683 if (isa<RegisterSDNode>(N->getOperand(i))) { 684 IsRegSeq = false; 685 break; 686 } 687 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 688 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 689 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); 690 } 691 if (NOps != NumVectorElts) { 692 // Fill in the missing undef elements if this was a scalar_to_vector. 693 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 694 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 695 DL, EltVT); 696 for (unsigned i = NOps; i < NumVectorElts; ++i) { 697 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 698 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 699 RegSeqArgs[1 + (2 * i) + 1] = 700 CurDAG->getTargetConstant(Sub, DL, MVT::i32); 701 } 702 } 703 704 if (!IsRegSeq) 705 SelectCode(N); 706 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 707 } 708 709 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 710 unsigned int Opc = N->getOpcode(); 711 if (N->isMachineOpcode()) { 712 N->setNodeId(-1); 713 return; // Already selected. 714 } 715 716 if (isa<AtomicSDNode>(N) || 717 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 718 Opc == ISD::ATOMIC_LOAD_FADD || 719 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 720 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 721 N = glueCopyToM0LDSInit(N); 722 723 switch (Opc) { 724 default: 725 break; 726 // We are selecting i64 ADD here instead of custom lower it during 727 // DAG legalization, so we can fold some i64 ADDs used for address 728 // calculation into the LOAD and STORE instructions. 729 case ISD::ADDC: 730 case ISD::ADDE: 731 case ISD::SUBC: 732 case ISD::SUBE: { 733 if (N->getValueType(0) != MVT::i64) 734 break; 735 736 SelectADD_SUB_I64(N); 737 return; 738 } 739 case ISD::ADDCARRY: 740 case ISD::SUBCARRY: 741 if (N->getValueType(0) != MVT::i32) 742 break; 743 744 SelectAddcSubb(N); 745 return; 746 case ISD::UADDO: 747 case ISD::USUBO: { 748 SelectUADDO_USUBO(N); 749 return; 750 } 751 case AMDGPUISD::FMUL_W_CHAIN: { 752 SelectFMUL_W_CHAIN(N); 753 return; 754 } 755 case AMDGPUISD::FMA_W_CHAIN: { 756 SelectFMA_W_CHAIN(N); 757 return; 758 } 759 760 case ISD::SCALAR_TO_VECTOR: 761 case ISD::BUILD_VECTOR: { 762 EVT VT = N->getValueType(0); 763 unsigned NumVectorElts = VT.getVectorNumElements(); 764 if (VT.getScalarSizeInBits() == 16) { 765 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { 766 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) { 767 ReplaceNode(N, Packed); 768 return; 769 } 770 } 771 772 break; 773 } 774 775 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 776 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 777 SelectBuildVector(N, RegClassID); 778 return; 779 } 780 case ISD::BUILD_PAIR: { 781 SDValue RC, SubReg0, SubReg1; 782 SDLoc DL(N); 783 if (N->getValueType(0) == MVT::i128) { 784 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 785 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 786 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 787 } else if (N->getValueType(0) == MVT::i64) { 788 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 789 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 790 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 791 } else { 792 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 793 } 794 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 795 N->getOperand(1), SubReg1 }; 796 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 797 N->getValueType(0), Ops)); 798 return; 799 } 800 801 case ISD::Constant: 802 case ISD::ConstantFP: { 803 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 804 break; 805 806 uint64_t Imm; 807 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 808 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 809 else { 810 ConstantSDNode *C = cast<ConstantSDNode>(N); 811 Imm = C->getZExtValue(); 812 } 813 814 SDLoc DL(N); 815 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); 816 return; 817 } 818 case ISD::LOAD: 819 case ISD::STORE: 820 case ISD::ATOMIC_LOAD: 821 case ISD::ATOMIC_STORE: { 822 N = glueCopyToM0LDSInit(N); 823 break; 824 } 825 826 case AMDGPUISD::BFE_I32: 827 case AMDGPUISD::BFE_U32: { 828 // There is a scalar version available, but unlike the vector version which 829 // has a separate operand for the offset and width, the scalar version packs 830 // the width and offset into a single operand. Try to move to the scalar 831 // version if the offsets are constant, so that we can try to keep extended 832 // loads of kernel arguments in SGPRs. 833 834 // TODO: Technically we could try to pattern match scalar bitshifts of 835 // dynamic values, but it's probably not useful. 836 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 837 if (!Offset) 838 break; 839 840 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 841 if (!Width) 842 break; 843 844 bool Signed = Opc == AMDGPUISD::BFE_I32; 845 846 uint32_t OffsetVal = Offset->getZExtValue(); 847 uint32_t WidthVal = Width->getZExtValue(); 848 849 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 850 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 851 return; 852 } 853 case AMDGPUISD::DIV_SCALE: { 854 SelectDIV_SCALE(N); 855 return; 856 } 857 case AMDGPUISD::DIV_FMAS: { 858 SelectDIV_FMAS(N); 859 return; 860 } 861 case AMDGPUISD::MAD_I64_I32: 862 case AMDGPUISD::MAD_U64_U32: { 863 SelectMAD_64_32(N); 864 return; 865 } 866 case ISD::CopyToReg: { 867 const SITargetLowering& Lowering = 868 *static_cast<const SITargetLowering*>(getTargetLowering()); 869 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 870 break; 871 } 872 case ISD::AND: 873 case ISD::SRL: 874 case ISD::SRA: 875 case ISD::SIGN_EXTEND_INREG: 876 if (N->getValueType(0) != MVT::i32) 877 break; 878 879 SelectS_BFE(N); 880 return; 881 case ISD::BRCOND: 882 SelectBRCOND(N); 883 return; 884 case ISD::FMAD: 885 case ISD::FMA: 886 SelectFMAD_FMA(N); 887 return; 888 case AMDGPUISD::ATOMIC_CMP_SWAP: 889 SelectATOMIC_CMP_SWAP(N); 890 return; 891 case AMDGPUISD::CVT_PKRTZ_F16_F32: 892 case AMDGPUISD::CVT_PKNORM_I16_F32: 893 case AMDGPUISD::CVT_PKNORM_U16_F32: 894 case AMDGPUISD::CVT_PK_U16_U32: 895 case AMDGPUISD::CVT_PK_I16_I32: { 896 // Hack around using a legal type if f16 is illegal. 897 if (N->getValueType(0) == MVT::i32) { 898 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16; 899 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), 900 { N->getOperand(0), N->getOperand(1) }); 901 SelectCode(N); 902 return; 903 } 904 905 break; 906 } 907 case ISD::INTRINSIC_W_CHAIN: { 908 SelectINTRINSIC_W_CHAIN(N); 909 return; 910 } 911 case ISD::INTRINSIC_VOID: { 912 SelectINTRINSIC_VOID(N); 913 return; 914 } 915 } 916 917 SelectCode(N); 918 } 919 920 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 921 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 922 const Instruction *Term = BB->getTerminator(); 923 return Term->getMetadata("amdgpu.uniform") || 924 Term->getMetadata("structurizecfg.uniform"); 925 } 926 927 StringRef AMDGPUDAGToDAGISel::getPassName() const { 928 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 929 } 930 931 //===----------------------------------------------------------------------===// 932 // Complex Patterns 933 //===----------------------------------------------------------------------===// 934 935 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 936 SDValue &Offset) { 937 return false; 938 } 939 940 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 941 SDValue &Offset) { 942 ConstantSDNode *C; 943 SDLoc DL(Addr); 944 945 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 946 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 947 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 948 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 949 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 950 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 951 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 952 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 953 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 954 Base = Addr.getOperand(0); 955 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 956 } else { 957 Base = Addr; 958 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 959 } 960 961 return true; 962 } 963 964 // FIXME: Should only handle addcarry/subcarry 965 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 966 SDLoc DL(N); 967 SDValue LHS = N->getOperand(0); 968 SDValue RHS = N->getOperand(1); 969 970 unsigned Opcode = N->getOpcode(); 971 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 972 bool ProduceCarry = 973 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 974 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 975 976 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 977 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 978 979 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 980 DL, MVT::i32, LHS, Sub0); 981 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 982 DL, MVT::i32, LHS, Sub1); 983 984 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 985 DL, MVT::i32, RHS, Sub0); 986 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 987 DL, MVT::i32, RHS, Sub1); 988 989 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 990 991 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 992 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 993 994 SDNode *AddLo; 995 if (!ConsumeCarry) { 996 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 997 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 998 } else { 999 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 1000 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 1001 } 1002 SDValue AddHiArgs[] = { 1003 SDValue(Hi0, 0), 1004 SDValue(Hi1, 0), 1005 SDValue(AddLo, 1) 1006 }; 1007 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 1008 1009 SDValue RegSequenceArgs[] = { 1010 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1011 SDValue(AddLo,0), 1012 Sub0, 1013 SDValue(AddHi,0), 1014 Sub1, 1015 }; 1016 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1017 MVT::i64, RegSequenceArgs); 1018 1019 if (ProduceCarry) { 1020 // Replace the carry-use 1021 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 1022 } 1023 1024 // Replace the remaining uses. 1025 ReplaceNode(N, RegSequence); 1026 } 1027 1028 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { 1029 SDLoc DL(N); 1030 SDValue LHS = N->getOperand(0); 1031 SDValue RHS = N->getOperand(1); 1032 SDValue CI = N->getOperand(2); 1033 1034 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64 1035 : AMDGPU::V_SUBB_U32_e64; 1036 CurDAG->SelectNodeTo( 1037 N, Opc, N->getVTList(), 1038 {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); 1039 } 1040 1041 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 1042 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 1043 // carry out despite the _i32 name. These were renamed in VI to _U32. 1044 // FIXME: We should probably rename the opcodes here. 1045 unsigned Opc = N->getOpcode() == ISD::UADDO ? 1046 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 1047 1048 CurDAG->SelectNodeTo( 1049 N, Opc, N->getVTList(), 1050 {N->getOperand(0), N->getOperand(1), 1051 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); 1052 } 1053 1054 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 1055 SDLoc SL(N); 1056 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 1057 SDValue Ops[10]; 1058 1059 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 1060 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 1061 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 1062 Ops[8] = N->getOperand(0); 1063 Ops[9] = N->getOperand(4); 1064 1065 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 1066 } 1067 1068 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 1069 SDLoc SL(N); 1070 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 1071 SDValue Ops[8]; 1072 1073 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 1074 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 1075 Ops[6] = N->getOperand(0); 1076 Ops[7] = N->getOperand(3); 1077 1078 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 1079 } 1080 1081 // We need to handle this here because tablegen doesn't support matching 1082 // instructions with multiple outputs. 1083 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 1084 SDLoc SL(N); 1085 EVT VT = N->getValueType(0); 1086 1087 assert(VT == MVT::f32 || VT == MVT::f64); 1088 1089 unsigned Opc 1090 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 1091 1092 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 1093 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 1094 } 1095 1096 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) { 1097 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); 1098 const SIRegisterInfo *TRI = ST->getRegisterInfo(); 1099 1100 SDLoc SL(N); 1101 EVT VT = N->getValueType(0); 1102 1103 assert(VT == MVT::f32 || VT == MVT::f64); 1104 1105 unsigned Opc 1106 = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32; 1107 1108 SDValue CarryIn = N->getOperand(3); 1109 // V_DIV_FMAS implicitly reads VCC. 1110 SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL, 1111 TRI->getVCC(), CarryIn, SDValue()); 1112 1113 SDValue Ops[10]; 1114 1115 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 1116 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 1117 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 1118 1119 Ops[8] = VCC; 1120 Ops[9] = VCC.getValue(1); 1121 1122 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 1123 } 1124 1125 // We need to handle this here because tablegen doesn't support matching 1126 // instructions with multiple outputs. 1127 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 1128 SDLoc SL(N); 1129 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 1130 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 1131 1132 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 1133 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 1134 Clamp }; 1135 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 1136 } 1137 1138 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset, 1139 unsigned OffsetBits) const { 1140 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 1141 (OffsetBits == 8 && !isUInt<8>(Offset))) 1142 return false; 1143 1144 if (Subtarget->hasUsableDSOffset() || 1145 Subtarget->unsafeDSOffsetFoldingEnabled()) 1146 return true; 1147 1148 // On Southern Islands instruction with a negative base value and an offset 1149 // don't seem to work. 1150 return CurDAG->SignBitIsZero(Base); 1151 } 1152 1153 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 1154 SDValue &Offset) const { 1155 SDLoc DL(Addr); 1156 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1157 SDValue N0 = Addr.getOperand(0); 1158 SDValue N1 = Addr.getOperand(1); 1159 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1160 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 1161 // (add n0, c0) 1162 Base = N0; 1163 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1164 return true; 1165 } 1166 } else if (Addr.getOpcode() == ISD::SUB) { 1167 // sub C, x -> add (sub 0, x), C 1168 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 1169 int64_t ByteOffset = C->getSExtValue(); 1170 if (isUInt<16>(ByteOffset)) { 1171 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 1172 1173 // XXX - This is kind of hacky. Create a dummy sub node so we can check 1174 // the known bits in isDSOffsetLegal. We need to emit the selected node 1175 // here, so this is thrown away. 1176 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 1177 Zero, Addr.getOperand(1)); 1178 1179 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 1180 SmallVector<SDValue, 3> Opnds; 1181 Opnds.push_back(Zero); 1182 Opnds.push_back(Addr.getOperand(1)); 1183 1184 // FIXME: Select to VOP3 version for with-carry. 1185 unsigned SubOp = AMDGPU::V_SUB_I32_e32; 1186 if (Subtarget->hasAddNoCarry()) { 1187 SubOp = AMDGPU::V_SUB_U32_e64; 1188 Opnds.push_back( 1189 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit 1190 } 1191 1192 MachineSDNode *MachineSub = 1193 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); 1194 1195 Base = SDValue(MachineSub, 0); 1196 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 1197 return true; 1198 } 1199 } 1200 } 1201 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1202 // If we have a constant address, prefer to put the constant into the 1203 // offset. This can save moves to load the constant address since multiple 1204 // operations can share the zero base address register, and enables merging 1205 // into read2 / write2 instructions. 1206 1207 SDLoc DL(Addr); 1208 1209 if (isUInt<16>(CAddr->getZExtValue())) { 1210 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 1211 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1212 DL, MVT::i32, Zero); 1213 Base = SDValue(MovZero, 0); 1214 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1215 return true; 1216 } 1217 } 1218 1219 // default case 1220 Base = Addr; 1221 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 1222 return true; 1223 } 1224 1225 // TODO: If offset is too big, put low 16-bit into offset. 1226 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 1227 SDValue &Offset0, 1228 SDValue &Offset1) const { 1229 SDLoc DL(Addr); 1230 1231 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1232 SDValue N0 = Addr.getOperand(0); 1233 SDValue N1 = Addr.getOperand(1); 1234 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1235 unsigned DWordOffset0 = C1->getZExtValue() / 4; 1236 unsigned DWordOffset1 = DWordOffset0 + 1; 1237 // (add n0, c0) 1238 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 1239 Base = N0; 1240 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 1241 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 1242 return true; 1243 } 1244 } else if (Addr.getOpcode() == ISD::SUB) { 1245 // sub C, x -> add (sub 0, x), C 1246 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 1247 unsigned DWordOffset0 = C->getZExtValue() / 4; 1248 unsigned DWordOffset1 = DWordOffset0 + 1; 1249 1250 if (isUInt<8>(DWordOffset0)) { 1251 SDLoc DL(Addr); 1252 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 1253 1254 // XXX - This is kind of hacky. Create a dummy sub node so we can check 1255 // the known bits in isDSOffsetLegal. We need to emit the selected node 1256 // here, so this is thrown away. 1257 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 1258 Zero, Addr.getOperand(1)); 1259 1260 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 1261 SmallVector<SDValue, 3> Opnds; 1262 Opnds.push_back(Zero); 1263 Opnds.push_back(Addr.getOperand(1)); 1264 unsigned SubOp = AMDGPU::V_SUB_I32_e32; 1265 if (Subtarget->hasAddNoCarry()) { 1266 SubOp = AMDGPU::V_SUB_U32_e64; 1267 Opnds.push_back( 1268 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit 1269 } 1270 1271 MachineSDNode *MachineSub 1272 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); 1273 1274 Base = SDValue(MachineSub, 0); 1275 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 1276 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 1277 return true; 1278 } 1279 } 1280 } 1281 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1282 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 1283 unsigned DWordOffset1 = DWordOffset0 + 1; 1284 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 1285 1286 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 1287 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 1288 MachineSDNode *MovZero 1289 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1290 DL, MVT::i32, Zero); 1291 Base = SDValue(MovZero, 0); 1292 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 1293 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 1294 return true; 1295 } 1296 } 1297 1298 // default case 1299 1300 Base = Addr; 1301 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 1302 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 1303 return true; 1304 } 1305 1306 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1307 SDValue &VAddr, SDValue &SOffset, 1308 SDValue &Offset, SDValue &Offen, 1309 SDValue &Idxen, SDValue &Addr64, 1310 SDValue &GLC, SDValue &SLC, 1311 SDValue &TFE, SDValue &DLC) const { 1312 // Subtarget prefers to use flat instruction 1313 if (Subtarget->useFlatForGlobal()) 1314 return false; 1315 1316 SDLoc DL(Addr); 1317 1318 if (!GLC.getNode()) 1319 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1320 if (!SLC.getNode()) 1321 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1322 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1323 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1324 1325 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1326 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1327 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1328 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1329 1330 ConstantSDNode *C1 = nullptr; 1331 SDValue N0 = Addr; 1332 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1333 C1 = cast<ConstantSDNode>(Addr.getOperand(1)); 1334 if (isUInt<32>(C1->getZExtValue())) 1335 N0 = Addr.getOperand(0); 1336 else 1337 C1 = nullptr; 1338 } 1339 1340 if (N0.getOpcode() == ISD::ADD) { 1341 // (add N2, N3) -> addr64, or 1342 // (add (add N2, N3), C1) -> addr64 1343 SDValue N2 = N0.getOperand(0); 1344 SDValue N3 = N0.getOperand(1); 1345 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1346 1347 if (N2->isDivergent()) { 1348 if (N3->isDivergent()) { 1349 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the 1350 // addr64, and construct the resource from a 0 address. 1351 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); 1352 VAddr = N0; 1353 } else { 1354 // N2 is divergent, N3 is not. 1355 Ptr = N3; 1356 VAddr = N2; 1357 } 1358 } else { 1359 // N2 is not divergent. 1360 Ptr = N2; 1361 VAddr = N3; 1362 } 1363 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1364 } else if (N0->isDivergent()) { 1365 // N0 is divergent. Use it as the addr64, and construct the resource from a 1366 // 0 address. 1367 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); 1368 VAddr = N0; 1369 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1370 } else { 1371 // N0 -> offset, or 1372 // (N0 + C1) -> offset 1373 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1374 Ptr = N0; 1375 } 1376 1377 if (!C1) { 1378 // No offset. 1379 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1380 return true; 1381 } 1382 1383 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1384 // Legal offset for instruction. 1385 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1386 return true; 1387 } 1388 1389 // Illegal offset, store it in soffset. 1390 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1391 SOffset = 1392 SDValue(CurDAG->getMachineNode( 1393 AMDGPU::S_MOV_B32, DL, MVT::i32, 1394 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1395 0); 1396 return true; 1397 } 1398 1399 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1400 SDValue &VAddr, SDValue &SOffset, 1401 SDValue &Offset, SDValue &GLC, 1402 SDValue &SLC, SDValue &TFE, 1403 SDValue &DLC) const { 1404 SDValue Ptr, Offen, Idxen, Addr64; 1405 1406 // addr64 bit was removed for volcanic islands. 1407 if (!Subtarget->hasAddr64()) 1408 return false; 1409 1410 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1411 GLC, SLC, TFE, DLC)) 1412 return false; 1413 1414 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1415 if (C->getSExtValue()) { 1416 SDLoc DL(Addr); 1417 1418 const SITargetLowering& Lowering = 1419 *static_cast<const SITargetLowering*>(getTargetLowering()); 1420 1421 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1422 return true; 1423 } 1424 1425 return false; 1426 } 1427 1428 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1429 SDValue &VAddr, SDValue &SOffset, 1430 SDValue &Offset, 1431 SDValue &SLC) const { 1432 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1433 SDValue GLC, TFE, DLC; 1434 1435 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); 1436 } 1437 1438 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1439 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1440 return PSV && PSV->isStack(); 1441 } 1442 1443 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1444 const MachineFunction &MF = CurDAG->getMachineFunction(); 1445 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1446 1447 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1448 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1449 FI->getValueType(0)); 1450 1451 // If we can resolve this to a frame index access, this will be relative to 1452 // either the stack or frame pointer SGPR. 1453 return std::make_pair( 1454 TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)); 1455 } 1456 1457 // If we don't know this private access is a local stack object, it needs to 1458 // be relative to the entry point's scratch wave offset register. 1459 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1460 MVT::i32)); 1461 } 1462 1463 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1464 SDValue Addr, SDValue &Rsrc, 1465 SDValue &VAddr, SDValue &SOffset, 1466 SDValue &ImmOffset) const { 1467 1468 SDLoc DL(Addr); 1469 MachineFunction &MF = CurDAG->getMachineFunction(); 1470 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1471 1472 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1473 1474 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1475 unsigned Imm = CAddr->getZExtValue(); 1476 1477 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1478 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1479 DL, MVT::i32, HighBits); 1480 VAddr = SDValue(MovHighBits, 0); 1481 1482 // In a call sequence, stores to the argument stack area are relative to the 1483 // stack pointer. 1484 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1485 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1486 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1487 1488 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1489 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1490 return true; 1491 } 1492 1493 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1494 // (add n0, c1) 1495 1496 SDValue N0 = Addr.getOperand(0); 1497 SDValue N1 = Addr.getOperand(1); 1498 1499 // Offsets in vaddr must be positive if range checking is enabled. 1500 // 1501 // The total computation of vaddr + soffset + offset must not overflow. If 1502 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1503 // overflowing. 1504 // 1505 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1506 // always perform a range check. If a negative vaddr base index was used, 1507 // this would fail the range check. The overall address computation would 1508 // compute a valid address, but this doesn't happen due to the range 1509 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1510 // 1511 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1512 // MUBUF vaddr, but not on older subtargets which can only do this if the 1513 // sign bit is known 0. 1514 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1515 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1516 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1517 CurDAG->SignBitIsZero(N0))) { 1518 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1519 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1520 return true; 1521 } 1522 } 1523 1524 // (node) 1525 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1526 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1527 return true; 1528 } 1529 1530 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1531 SDValue Addr, 1532 SDValue &SRsrc, 1533 SDValue &SOffset, 1534 SDValue &Offset) const { 1535 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1536 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1537 return false; 1538 1539 SDLoc DL(Addr); 1540 MachineFunction &MF = CurDAG->getMachineFunction(); 1541 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1542 1543 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1544 1545 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1546 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1547 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1548 1549 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1550 // offset if we know this is in a call sequence. 1551 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1552 1553 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1554 return true; 1555 } 1556 1557 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1558 SDValue &SOffset, SDValue &Offset, 1559 SDValue &GLC, SDValue &SLC, 1560 SDValue &TFE, SDValue &DLC) const { 1561 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1562 const SIInstrInfo *TII = 1563 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1564 1565 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1566 GLC, SLC, TFE, DLC)) 1567 return false; 1568 1569 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1570 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1571 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1572 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1573 APInt::getAllOnesValue(32).getZExtValue(); // Size 1574 SDLoc DL(Addr); 1575 1576 const SITargetLowering& Lowering = 1577 *static_cast<const SITargetLowering*>(getTargetLowering()); 1578 1579 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1580 return true; 1581 } 1582 return false; 1583 } 1584 1585 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1586 SDValue &Soffset, SDValue &Offset 1587 ) const { 1588 SDValue GLC, SLC, TFE, DLC; 1589 1590 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); 1591 } 1592 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1593 SDValue &Soffset, SDValue &Offset, 1594 SDValue &SLC) const { 1595 SDValue GLC, TFE, DLC; 1596 1597 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); 1598 } 1599 1600 template <bool IsSigned> 1601 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, 1602 SDValue Addr, 1603 SDValue &VAddr, 1604 SDValue &Offset, 1605 SDValue &SLC) const { 1606 return static_cast<const SITargetLowering*>(getTargetLowering())-> 1607 SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); 1608 } 1609 1610 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, 1611 SDValue Addr, 1612 SDValue &VAddr, 1613 SDValue &Offset, 1614 SDValue &SLC) const { 1615 return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC); 1616 } 1617 1618 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, 1619 SDValue Addr, 1620 SDValue &VAddr, 1621 SDValue &Offset, 1622 SDValue &SLC) const { 1623 return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC); 1624 } 1625 1626 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1627 SDValue &Offset, bool &Imm) const { 1628 1629 // FIXME: Handle non-constant offsets. 1630 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1631 if (!C) 1632 return false; 1633 1634 SDLoc SL(ByteOffsetNode); 1635 GCNSubtarget::Generation Gen = Subtarget->getGeneration(); 1636 int64_t ByteOffset = C->getSExtValue(); 1637 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1638 1639 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1640 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1641 Imm = true; 1642 return true; 1643 } 1644 1645 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1646 return false; 1647 1648 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1649 // 32-bit Immediates are supported on Sea Islands. 1650 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1651 } else { 1652 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1653 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1654 C32Bit), 0); 1655 } 1656 Imm = false; 1657 return true; 1658 } 1659 1660 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1661 if (Addr.getValueType() != MVT::i32) 1662 return Addr; 1663 1664 // Zero-extend a 32-bit address. 1665 SDLoc SL(Addr); 1666 1667 const MachineFunction &MF = CurDAG->getMachineFunction(); 1668 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1669 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1670 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1671 1672 const SDValue Ops[] = { 1673 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1674 Addr, 1675 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1676 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1677 0), 1678 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1679 }; 1680 1681 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1682 Ops), 0); 1683 } 1684 1685 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1686 SDValue &Offset, bool &Imm) const { 1687 SDLoc SL(Addr); 1688 1689 // A 32-bit (address + offset) should not cause unsigned 32-bit integer 1690 // wraparound, because s_load instructions perform the addition in 64 bits. 1691 if ((Addr.getValueType() != MVT::i32 || 1692 Addr->getFlags().hasNoUnsignedWrap()) && 1693 CurDAG->isBaseWithConstantOffset(Addr)) { 1694 SDValue N0 = Addr.getOperand(0); 1695 SDValue N1 = Addr.getOperand(1); 1696 1697 if (SelectSMRDOffset(N1, Offset, Imm)) { 1698 SBase = Expand32BitAddress(N0); 1699 return true; 1700 } 1701 } 1702 SBase = Expand32BitAddress(Addr); 1703 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1704 Imm = true; 1705 return true; 1706 } 1707 1708 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1709 SDValue &Offset) const { 1710 bool Imm; 1711 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1712 } 1713 1714 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1715 SDValue &Offset) const { 1716 1717 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1718 return false; 1719 1720 bool Imm; 1721 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1722 return false; 1723 1724 return !Imm && isa<ConstantSDNode>(Offset); 1725 } 1726 1727 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1728 SDValue &Offset) const { 1729 bool Imm; 1730 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1731 !isa<ConstantSDNode>(Offset); 1732 } 1733 1734 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1735 SDValue &Offset) const { 1736 bool Imm; 1737 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1738 } 1739 1740 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1741 SDValue &Offset) const { 1742 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1743 return false; 1744 1745 bool Imm; 1746 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1747 return false; 1748 1749 return !Imm && isa<ConstantSDNode>(Offset); 1750 } 1751 1752 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1753 SDValue &Base, 1754 SDValue &Offset) const { 1755 SDLoc DL(Index); 1756 1757 if (CurDAG->isBaseWithConstantOffset(Index)) { 1758 SDValue N0 = Index.getOperand(0); 1759 SDValue N1 = Index.getOperand(1); 1760 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1761 1762 // (add n0, c0) 1763 // Don't peel off the offset (c0) if doing so could possibly lead 1764 // the base (n0) to be negative. 1765 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) { 1766 Base = N0; 1767 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1768 return true; 1769 } 1770 } 1771 1772 if (isa<ConstantSDNode>(Index)) 1773 return false; 1774 1775 Base = Index; 1776 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1777 return true; 1778 } 1779 1780 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1781 SDValue Val, uint32_t Offset, 1782 uint32_t Width) { 1783 // Transformation function, pack the offset and width of a BFE into 1784 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1785 // source, bits [5:0] contain the offset and bits [22:16] the width. 1786 uint32_t PackedVal = Offset | (Width << 16); 1787 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1788 1789 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1790 } 1791 1792 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1793 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1794 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1795 // Predicate: 0 < b <= c < 32 1796 1797 const SDValue &Shl = N->getOperand(0); 1798 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1799 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1800 1801 if (B && C) { 1802 uint32_t BVal = B->getZExtValue(); 1803 uint32_t CVal = C->getZExtValue(); 1804 1805 if (0 < BVal && BVal <= CVal && CVal < 32) { 1806 bool Signed = N->getOpcode() == ISD::SRA; 1807 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1808 1809 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1810 32 - CVal)); 1811 return; 1812 } 1813 } 1814 SelectCode(N); 1815 } 1816 1817 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1818 switch (N->getOpcode()) { 1819 case ISD::AND: 1820 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1821 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1822 // Predicate: isMask(mask) 1823 const SDValue &Srl = N->getOperand(0); 1824 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1825 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1826 1827 if (Shift && Mask) { 1828 uint32_t ShiftVal = Shift->getZExtValue(); 1829 uint32_t MaskVal = Mask->getZExtValue(); 1830 1831 if (isMask_32(MaskVal)) { 1832 uint32_t WidthVal = countPopulation(MaskVal); 1833 1834 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1835 Srl.getOperand(0), ShiftVal, WidthVal)); 1836 return; 1837 } 1838 } 1839 } 1840 break; 1841 case ISD::SRL: 1842 if (N->getOperand(0).getOpcode() == ISD::AND) { 1843 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1844 // Predicate: isMask(mask >> b) 1845 const SDValue &And = N->getOperand(0); 1846 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1847 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1848 1849 if (Shift && Mask) { 1850 uint32_t ShiftVal = Shift->getZExtValue(); 1851 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1852 1853 if (isMask_32(MaskVal)) { 1854 uint32_t WidthVal = countPopulation(MaskVal); 1855 1856 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1857 And.getOperand(0), ShiftVal, WidthVal)); 1858 return; 1859 } 1860 } 1861 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1862 SelectS_BFEFromShifts(N); 1863 return; 1864 } 1865 break; 1866 case ISD::SRA: 1867 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1868 SelectS_BFEFromShifts(N); 1869 return; 1870 } 1871 break; 1872 1873 case ISD::SIGN_EXTEND_INREG: { 1874 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1875 SDValue Src = N->getOperand(0); 1876 if (Src.getOpcode() != ISD::SRL) 1877 break; 1878 1879 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1880 if (!Amt) 1881 break; 1882 1883 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1884 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1885 Amt->getZExtValue(), Width)); 1886 return; 1887 } 1888 } 1889 1890 SelectCode(N); 1891 } 1892 1893 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1894 assert(N->getOpcode() == ISD::BRCOND); 1895 if (!N->hasOneUse()) 1896 return false; 1897 1898 SDValue Cond = N->getOperand(1); 1899 if (Cond.getOpcode() == ISD::CopyToReg) 1900 Cond = Cond.getOperand(2); 1901 1902 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1903 return false; 1904 1905 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1906 if (VT == MVT::i32) 1907 return true; 1908 1909 if (VT == MVT::i64) { 1910 auto ST = static_cast<const GCNSubtarget *>(Subtarget); 1911 1912 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1913 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1914 } 1915 1916 return false; 1917 } 1918 1919 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1920 SDValue Cond = N->getOperand(1); 1921 1922 if (Cond.isUndef()) { 1923 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1924 N->getOperand(2), N->getOperand(0)); 1925 return; 1926 } 1927 1928 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); 1929 const SIRegisterInfo *TRI = ST->getRegisterInfo(); 1930 1931 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1932 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1933 unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC(); 1934 SDLoc SL(N); 1935 1936 if (!UseSCCBr) { 1937 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1938 // analyzed what generates the vcc value, so we do not know whether vcc 1939 // bits for disabled lanes are 0. Thus we need to mask out bits for 1940 // disabled lanes. 1941 // 1942 // For the case that we select S_CBRANCH_SCC1 and it gets 1943 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1944 // SIInstrInfo::moveToVALU which inserts the S_AND). 1945 // 1946 // We could add an analysis of what generates the vcc value here and omit 1947 // the S_AND when is unnecessary. But it would be better to add a separate 1948 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1949 // catches both cases. 1950 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32 1951 : AMDGPU::S_AND_B64, 1952 SL, MVT::i1, 1953 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO 1954 : AMDGPU::EXEC, 1955 MVT::i1), 1956 Cond), 1957 0); 1958 } 1959 1960 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1961 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1962 N->getOperand(2), // Basic Block 1963 VCC.getValue(0)); 1964 } 1965 1966 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { 1967 MVT VT = N->getSimpleValueType(0); 1968 bool IsFMA = N->getOpcode() == ISD::FMA; 1969 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && 1970 !Subtarget->hasFmaMixInsts()) || 1971 ((IsFMA && Subtarget->hasMadMixInsts()) || 1972 (!IsFMA && Subtarget->hasFmaMixInsts()))) { 1973 SelectCode(N); 1974 return; 1975 } 1976 1977 SDValue Src0 = N->getOperand(0); 1978 SDValue Src1 = N->getOperand(1); 1979 SDValue Src2 = N->getOperand(2); 1980 unsigned Src0Mods, Src1Mods, Src2Mods; 1981 1982 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand 1983 // using the conversion from f16. 1984 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1985 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1986 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1987 1988 assert((IsFMA || !Subtarget->hasFP32Denormals()) && 1989 "fmad selected with denormals enabled"); 1990 // TODO: We can select this with f32 denormals enabled if all the sources are 1991 // converted from f16 (in which case fmad isn't legal). 1992 1993 if (Sel0 || Sel1 || Sel2) { 1994 // For dummy operands. 1995 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1996 SDValue Ops[] = { 1997 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1998 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1999 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 2000 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 2001 Zero, Zero 2002 }; 2003 2004 CurDAG->SelectNodeTo(N, 2005 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, 2006 MVT::f32, Ops); 2007 } else { 2008 SelectCode(N); 2009 } 2010 } 2011 2012 // This is here because there isn't a way to use the generated sub0_sub1 as the 2013 // subreg index to EXTRACT_SUBREG in tablegen. 2014 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 2015 MemSDNode *Mem = cast<MemSDNode>(N); 2016 unsigned AS = Mem->getAddressSpace(); 2017 if (AS == AMDGPUAS::FLAT_ADDRESS) { 2018 SelectCode(N); 2019 return; 2020 } 2021 2022 MVT VT = N->getSimpleValueType(0); 2023 bool Is32 = (VT == MVT::i32); 2024 SDLoc SL(N); 2025 2026 MachineSDNode *CmpSwap = nullptr; 2027 if (Subtarget->hasAddr64()) { 2028 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 2029 2030 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 2031 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 2032 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 2033 SDValue CmpVal = Mem->getOperand(2); 2034 2035 // XXX - Do we care about glue operands? 2036 2037 SDValue Ops[] = { 2038 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 2039 }; 2040 2041 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 2042 } 2043 } 2044 2045 if (!CmpSwap) { 2046 SDValue SRsrc, SOffset, Offset, SLC; 2047 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 2048 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 2049 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 2050 2051 SDValue CmpVal = Mem->getOperand(2); 2052 SDValue Ops[] = { 2053 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 2054 }; 2055 2056 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 2057 } 2058 } 2059 2060 if (!CmpSwap) { 2061 SelectCode(N); 2062 return; 2063 } 2064 2065 MachineMemOperand *MMO = Mem->getMemOperand(); 2066 CurDAG->setNodeMemRefs(CmpSwap, {MMO}); 2067 2068 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 2069 SDValue Extract 2070 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 2071 2072 ReplaceUses(SDValue(N, 0), Extract); 2073 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 2074 CurDAG->RemoveDeadNode(N); 2075 } 2076 2077 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { 2078 // The address is assumed to be uniform, so if it ends up in a VGPR, it will 2079 // be copied to an SGPR with readfirstlane. 2080 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? 2081 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; 2082 2083 SDValue Chain = N->getOperand(0); 2084 SDValue Ptr = N->getOperand(2); 2085 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 2086 MachineMemOperand *MMO = M->getMemOperand(); 2087 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; 2088 2089 SDValue Offset; 2090 if (CurDAG->isBaseWithConstantOffset(Ptr)) { 2091 SDValue PtrBase = Ptr.getOperand(0); 2092 SDValue PtrOffset = Ptr.getOperand(1); 2093 2094 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); 2095 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) { 2096 N = glueCopyToM0(N, PtrBase); 2097 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); 2098 } 2099 } 2100 2101 if (!Offset) { 2102 N = glueCopyToM0(N, Ptr); 2103 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 2104 } 2105 2106 SDValue Ops[] = { 2107 Offset, 2108 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), 2109 Chain, 2110 N->getOperand(N->getNumOperands() - 1) // New glue 2111 }; 2112 2113 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 2114 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); 2115 } 2116 2117 static unsigned gwsIntrinToOpcode(unsigned IntrID) { 2118 switch (IntrID) { 2119 case Intrinsic::amdgcn_ds_gws_init: 2120 return AMDGPU::DS_GWS_INIT; 2121 case Intrinsic::amdgcn_ds_gws_barrier: 2122 return AMDGPU::DS_GWS_BARRIER; 2123 case Intrinsic::amdgcn_ds_gws_sema_v: 2124 return AMDGPU::DS_GWS_SEMA_V; 2125 case Intrinsic::amdgcn_ds_gws_sema_br: 2126 return AMDGPU::DS_GWS_SEMA_BR; 2127 case Intrinsic::amdgcn_ds_gws_sema_p: 2128 return AMDGPU::DS_GWS_SEMA_P; 2129 case Intrinsic::amdgcn_ds_gws_sema_release_all: 2130 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; 2131 default: 2132 llvm_unreachable("not a gws intrinsic"); 2133 } 2134 } 2135 2136 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { 2137 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && 2138 !Subtarget->hasGWSSemaReleaseAll()) { 2139 // Let this error. 2140 SelectCode(N); 2141 return; 2142 } 2143 2144 // Chain, intrinsic ID, vsrc, offset 2145 const bool HasVSrc = N->getNumOperands() == 4; 2146 assert(HasVSrc || N->getNumOperands() == 3); 2147 2148 SDLoc SL(N); 2149 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2); 2150 int ImmOffset = 0; 2151 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 2152 MachineMemOperand *MMO = M->getMemOperand(); 2153 2154 // Don't worry if the offset ends up in a VGPR. Only one lane will have 2155 // effect, so SIFixSGPRCopies will validly insert readfirstlane. 2156 2157 // The resource id offset is computed as (<isa opaque base> + M0[21:16] + 2158 // offset field) % 64. Some versions of the programming guide omit the m0 2159 // part, or claim it's from offset 0. 2160 if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { 2161 // If we have a constant offset, try to use the default value for m0 as a 2162 // base to possibly avoid setting it up. 2163 glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); 2164 ImmOffset = ConstOffset->getZExtValue() + 1; 2165 } else { 2166 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { 2167 ImmOffset = BaseOffset.getConstantOperandVal(1); 2168 BaseOffset = BaseOffset.getOperand(0); 2169 } 2170 2171 // Prefer to do the shift in an SGPR since it should be possible to use m0 2172 // as the result directly. If it's already an SGPR, it will be eliminated 2173 // later. 2174 SDNode *SGPROffset 2175 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, 2176 BaseOffset); 2177 // Shift to offset in m0 2178 SDNode *M0Base 2179 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, 2180 SDValue(SGPROffset, 0), 2181 CurDAG->getTargetConstant(16, SL, MVT::i32)); 2182 glueCopyToM0(N, SDValue(M0Base, 0)); 2183 } 2184 2185 SDValue V0; 2186 SDValue Chain = N->getOperand(0); 2187 SDValue Glue; 2188 if (HasVSrc) { 2189 SDValue VSrc0 = N->getOperand(2); 2190 2191 // The manual doesn't mention this, but it seems only v0 works. 2192 V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); 2193 2194 SDValue CopyToV0 = CurDAG->getCopyToReg( 2195 N->getOperand(0), SL, V0, VSrc0, 2196 N->getOperand(N->getNumOperands() - 1)); 2197 Chain = CopyToV0; 2198 Glue = CopyToV0.getValue(1); 2199 } 2200 2201 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); 2202 2203 // TODO: Can this just be removed from the instruction? 2204 SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1); 2205 2206 const unsigned Opc = gwsIntrinToOpcode(IntrID); 2207 SmallVector<SDValue, 5> Ops; 2208 if (HasVSrc) 2209 Ops.push_back(V0); 2210 Ops.push_back(OffsetField); 2211 Ops.push_back(GDS); 2212 Ops.push_back(Chain); 2213 2214 if (HasVSrc) 2215 Ops.push_back(Glue); 2216 2217 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 2218 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); 2219 } 2220 2221 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { 2222 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2223 switch (IntrID) { 2224 case Intrinsic::amdgcn_ds_append: 2225 case Intrinsic::amdgcn_ds_consume: { 2226 if (N->getValueType(0) != MVT::i32) 2227 break; 2228 SelectDSAppendConsume(N, IntrID); 2229 return; 2230 } 2231 } 2232 2233 SelectCode(N); 2234 } 2235 2236 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { 2237 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2238 switch (IntrID) { 2239 case Intrinsic::amdgcn_ds_gws_init: 2240 case Intrinsic::amdgcn_ds_gws_barrier: 2241 case Intrinsic::amdgcn_ds_gws_sema_v: 2242 case Intrinsic::amdgcn_ds_gws_sema_br: 2243 case Intrinsic::amdgcn_ds_gws_sema_p: 2244 case Intrinsic::amdgcn_ds_gws_sema_release_all: 2245 SelectDS_GWS(N, IntrID); 2246 return; 2247 default: 2248 break; 2249 } 2250 2251 SelectCode(N); 2252 } 2253 2254 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 2255 unsigned &Mods) const { 2256 Mods = 0; 2257 Src = In; 2258 2259 if (Src.getOpcode() == ISD::FNEG) { 2260 Mods |= SISrcMods::NEG; 2261 Src = Src.getOperand(0); 2262 } 2263 2264 if (Src.getOpcode() == ISD::FABS) { 2265 Mods |= SISrcMods::ABS; 2266 Src = Src.getOperand(0); 2267 } 2268 2269 return true; 2270 } 2271 2272 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 2273 SDValue &SrcMods) const { 2274 unsigned Mods; 2275 if (SelectVOP3ModsImpl(In, Src, Mods)) { 2276 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2277 return true; 2278 } 2279 2280 return false; 2281 } 2282 2283 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 2284 SDValue &SrcMods) const { 2285 SelectVOP3Mods(In, Src, SrcMods); 2286 return isNoNanSrc(Src); 2287 } 2288 2289 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src, 2290 SDValue &SrcMods) const { 2291 if (In.getValueType() == MVT::f32) 2292 return SelectVOP3Mods(In, Src, SrcMods); 2293 Src = In; 2294 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);; 2295 return true; 2296 } 2297 2298 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 2299 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 2300 return false; 2301 2302 Src = In; 2303 return true; 2304 } 2305 2306 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 2307 SDValue &SrcMods, SDValue &Clamp, 2308 SDValue &Omod) const { 2309 SDLoc DL(In); 2310 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 2311 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 2312 2313 return SelectVOP3Mods(In, Src, SrcMods); 2314 } 2315 2316 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 2317 SDValue &SrcMods, 2318 SDValue &Clamp, 2319 SDValue &Omod) const { 2320 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2321 return SelectVOP3Mods(In, Src, SrcMods); 2322 } 2323 2324 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 2325 SDValue &Clamp, SDValue &Omod) const { 2326 Src = In; 2327 2328 SDLoc DL(In); 2329 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 2330 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 2331 2332 return true; 2333 } 2334 2335 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 2336 SDValue &SrcMods) const { 2337 unsigned Mods = 0; 2338 Src = In; 2339 2340 if (Src.getOpcode() == ISD::FNEG) { 2341 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 2342 Src = Src.getOperand(0); 2343 } 2344 2345 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 2346 unsigned VecMods = Mods; 2347 2348 SDValue Lo = stripBitcast(Src.getOperand(0)); 2349 SDValue Hi = stripBitcast(Src.getOperand(1)); 2350 2351 if (Lo.getOpcode() == ISD::FNEG) { 2352 Lo = stripBitcast(Lo.getOperand(0)); 2353 Mods ^= SISrcMods::NEG; 2354 } 2355 2356 if (Hi.getOpcode() == ISD::FNEG) { 2357 Hi = stripBitcast(Hi.getOperand(0)); 2358 Mods ^= SISrcMods::NEG_HI; 2359 } 2360 2361 if (isExtractHiElt(Lo, Lo)) 2362 Mods |= SISrcMods::OP_SEL_0; 2363 2364 if (isExtractHiElt(Hi, Hi)) 2365 Mods |= SISrcMods::OP_SEL_1; 2366 2367 Lo = stripExtractLoElt(Lo); 2368 Hi = stripExtractLoElt(Hi); 2369 2370 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 2371 // Really a scalar input. Just select from the low half of the register to 2372 // avoid packing. 2373 2374 Src = Lo; 2375 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2376 return true; 2377 } 2378 2379 Mods = VecMods; 2380 } 2381 2382 // Packed instructions do not have abs modifiers. 2383 Mods |= SISrcMods::OP_SEL_1; 2384 2385 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2386 return true; 2387 } 2388 2389 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2390 SDValue &SrcMods, 2391 SDValue &Clamp) const { 2392 SDLoc SL(In); 2393 2394 // FIXME: Handle clamp and op_sel 2395 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2396 2397 return SelectVOP3PMods(In, Src, SrcMods); 2398 } 2399 2400 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2401 SDValue &SrcMods) const { 2402 Src = In; 2403 // FIXME: Handle op_sel 2404 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2405 return true; 2406 } 2407 2408 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2409 SDValue &SrcMods, 2410 SDValue &Clamp) const { 2411 SDLoc SL(In); 2412 2413 // FIXME: Handle clamp 2414 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2415 2416 return SelectVOP3OpSel(In, Src, SrcMods); 2417 } 2418 2419 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2420 SDValue &SrcMods) const { 2421 // FIXME: Handle op_sel 2422 return SelectVOP3Mods(In, Src, SrcMods); 2423 } 2424 2425 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2426 SDValue &SrcMods, 2427 SDValue &Clamp) const { 2428 SDLoc SL(In); 2429 2430 // FIXME: Handle clamp 2431 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2432 2433 return SelectVOP3OpSelMods(In, Src, SrcMods); 2434 } 2435 2436 // The return value is not whether the match is possible (which it always is), 2437 // but whether or not it a conversion is really used. 2438 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2439 unsigned &Mods) const { 2440 Mods = 0; 2441 SelectVOP3ModsImpl(In, Src, Mods); 2442 2443 if (Src.getOpcode() == ISD::FP_EXTEND) { 2444 Src = Src.getOperand(0); 2445 assert(Src.getValueType() == MVT::f16); 2446 Src = stripBitcast(Src); 2447 2448 // Be careful about folding modifiers if we already have an abs. fneg is 2449 // applied last, so we don't want to apply an earlier fneg. 2450 if ((Mods & SISrcMods::ABS) == 0) { 2451 unsigned ModsTmp; 2452 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2453 2454 if ((ModsTmp & SISrcMods::NEG) != 0) 2455 Mods ^= SISrcMods::NEG; 2456 2457 if ((ModsTmp & SISrcMods::ABS) != 0) 2458 Mods |= SISrcMods::ABS; 2459 } 2460 2461 // op_sel/op_sel_hi decide the source type and source. 2462 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2463 // If the sources's op_sel is set, it picks the high half of the source 2464 // register. 2465 2466 Mods |= SISrcMods::OP_SEL_1; 2467 if (isExtractHiElt(Src, Src)) { 2468 Mods |= SISrcMods::OP_SEL_0; 2469 2470 // TODO: Should we try to look for neg/abs here? 2471 } 2472 2473 return true; 2474 } 2475 2476 return false; 2477 } 2478 2479 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2480 SDValue &SrcMods) const { 2481 unsigned Mods = 0; 2482 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2483 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2484 return true; 2485 } 2486 2487 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { 2488 if (In.isUndef()) 2489 return CurDAG->getUNDEF(MVT::i32); 2490 2491 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2492 SDLoc SL(In); 2493 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32); 2494 } 2495 2496 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2497 SDLoc SL(In); 2498 return CurDAG->getConstant( 2499 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2500 } 2501 2502 SDValue Src; 2503 if (isExtractHiElt(In, Src)) 2504 return Src; 2505 2506 return SDValue(); 2507 } 2508 2509 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { 2510 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn); 2511 2512 const SIRegisterInfo *SIRI = 2513 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 2514 const SIInstrInfo * SII = 2515 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 2516 2517 unsigned Limit = 0; 2518 bool AllUsesAcceptSReg = true; 2519 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 2520 Limit < 10 && U != E; ++U, ++Limit) { 2521 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 2522 2523 // If the register class is unknown, it could be an unknown 2524 // register class that needs to be an SGPR, e.g. an inline asm 2525 // constraint 2526 if (!RC || SIRI->isSGPRClass(RC)) 2527 return false; 2528 2529 if (RC != &AMDGPU::VS_32RegClass) { 2530 AllUsesAcceptSReg = false; 2531 SDNode * User = *U; 2532 if (User->isMachineOpcode()) { 2533 unsigned Opc = User->getMachineOpcode(); 2534 MCInstrDesc Desc = SII->get(Opc); 2535 if (Desc.isCommutable()) { 2536 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo(); 2537 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; 2538 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { 2539 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); 2540 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); 2541 if (CommutedRC == &AMDGPU::VS_32RegClass) 2542 AllUsesAcceptSReg = true; 2543 } 2544 } 2545 } 2546 // If "AllUsesAcceptSReg == false" so far we haven't suceeded 2547 // commuting current user. This means have at least one use 2548 // that strictly require VGPR. Thus, we will not attempt to commute 2549 // other user instructions. 2550 if (!AllUsesAcceptSReg) 2551 break; 2552 } 2553 } 2554 return !AllUsesAcceptSReg && (Limit < 10); 2555 } 2556 2557 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { 2558 auto Ld = cast<LoadSDNode>(N); 2559 2560 return Ld->getAlignment() >= 4 && 2561 ( 2562 ( 2563 ( 2564 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || 2565 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT 2566 ) 2567 && 2568 !N->isDivergent() 2569 ) 2570 || 2571 ( 2572 Subtarget->getScalarizeGlobalBehavior() && 2573 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 2574 !Ld->isVolatile() && 2575 !N->isDivergent() && 2576 static_cast<const SITargetLowering *>( 2577 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) 2578 ) 2579 ); 2580 } 2581 2582 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2583 const AMDGPUTargetLowering& Lowering = 2584 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2585 bool IsModified = false; 2586 do { 2587 IsModified = false; 2588 2589 // Go over all selected nodes and try to fold them a bit more 2590 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2591 while (Position != CurDAG->allnodes_end()) { 2592 SDNode *Node = &*Position++; 2593 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2594 if (!MachineNode) 2595 continue; 2596 2597 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2598 if (ResNode != Node) { 2599 if (ResNode) 2600 ReplaceUses(Node, ResNode); 2601 IsModified = true; 2602 } 2603 } 2604 CurDAG->RemoveDeadNodes(); 2605 } while (IsModified); 2606 } 2607 2608 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 2609 Subtarget = &MF.getSubtarget<R600Subtarget>(); 2610 return SelectionDAGISel::runOnMachineFunction(MF); 2611 } 2612 2613 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 2614 if (!N->readMem()) 2615 return false; 2616 if (CbId == -1) 2617 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || 2618 N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 2619 2620 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 2621 } 2622 2623 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 2624 SDValue& IntPtr) { 2625 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 2626 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 2627 true); 2628 return true; 2629 } 2630 return false; 2631 } 2632 2633 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 2634 SDValue& BaseReg, SDValue &Offset) { 2635 if (!isa<ConstantSDNode>(Addr)) { 2636 BaseReg = Addr; 2637 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 2638 return true; 2639 } 2640 return false; 2641 } 2642 2643 void R600DAGToDAGISel::Select(SDNode *N) { 2644 unsigned int Opc = N->getOpcode(); 2645 if (N->isMachineOpcode()) { 2646 N->setNodeId(-1); 2647 return; // Already selected. 2648 } 2649 2650 switch (Opc) { 2651 default: break; 2652 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2653 case ISD::SCALAR_TO_VECTOR: 2654 case ISD::BUILD_VECTOR: { 2655 EVT VT = N->getValueType(0); 2656 unsigned NumVectorElts = VT.getVectorNumElements(); 2657 unsigned RegClassID; 2658 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2659 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2660 // pass. We want to avoid 128 bits copies as much as possible because they 2661 // can't be bundled by our scheduler. 2662 switch(NumVectorElts) { 2663 case 2: RegClassID = R600::R600_Reg64RegClassID; break; 2664 case 4: 2665 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2666 RegClassID = R600::R600_Reg128VerticalRegClassID; 2667 else 2668 RegClassID = R600::R600_Reg128RegClassID; 2669 break; 2670 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2671 } 2672 SelectBuildVector(N, RegClassID); 2673 return; 2674 } 2675 } 2676 2677 SelectCode(N); 2678 } 2679 2680 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2681 SDValue &Offset) { 2682 ConstantSDNode *C; 2683 SDLoc DL(Addr); 2684 2685 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2686 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 2687 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2688 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2689 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2690 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 2691 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2692 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2693 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2694 Base = Addr.getOperand(0); 2695 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2696 } else { 2697 Base = Addr; 2698 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2699 } 2700 2701 return true; 2702 } 2703 2704 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2705 SDValue &Offset) { 2706 ConstantSDNode *IMMOffset; 2707 2708 if (Addr.getOpcode() == ISD::ADD 2709 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2710 && isInt<16>(IMMOffset->getZExtValue())) { 2711 2712 Base = Addr.getOperand(0); 2713 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2714 MVT::i32); 2715 return true; 2716 // If the pointer address is constant, we can move it to the offset field. 2717 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2718 && isInt<16>(IMMOffset->getZExtValue())) { 2719 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2720 SDLoc(CurDAG->getEntryNode()), 2721 R600::ZERO, MVT::i32); 2722 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2723 MVT::i32); 2724 return true; 2725 } 2726 2727 // Default case, no offset 2728 Base = Addr; 2729 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2730 return true; 2731 } 2732