1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "SIInstrInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/MIRYamlMapping.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 namespace llvm { 27 28 class MachineFrameInfo; 29 class MachineFunction; 30 class SIMachineFunctionInfo; 31 class SIRegisterInfo; 32 class TargetRegisterClass; 33 34 class AMDGPUPseudoSourceValue : public PseudoSourceValue { 35 public: 36 enum AMDGPUPSVKind : unsigned { 37 PSVImage = PseudoSourceValue::TargetCustom, 38 GWSResource 39 }; 40 41 protected: 42 AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM) 43 : PseudoSourceValue(Kind, TM) {} 44 45 public: 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 return true; 54 } 55 56 bool mayAlias(const MachineFrameInfo *) const override { 57 return true; 58 } 59 }; 60 61 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { 62 public: 63 // TODO: Is the img rsrc useful? 64 explicit AMDGPUImagePseudoSourceValue(const AMDGPUTargetMachine &TM) 65 : AMDGPUPseudoSourceValue(PSVImage, TM) {} 66 67 static bool classof(const PseudoSourceValue *V) { 68 return V->kind() == PSVImage; 69 } 70 71 void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; } 72 }; 73 74 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { 75 public: 76 explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) 77 : AMDGPUPseudoSourceValue(GWSResource, TM) {} 78 79 static bool classof(const PseudoSourceValue *V) { 80 return V->kind() == GWSResource; 81 } 82 83 // These are inaccessible memory from IR. 84 bool isAliased(const MachineFrameInfo *) const override { 85 return false; 86 } 87 88 // These are inaccessible memory from IR. 89 bool mayAlias(const MachineFrameInfo *) const override { 90 return false; 91 } 92 93 void printCustom(raw_ostream &OS) const override { 94 OS << "GWSResource"; 95 } 96 }; 97 98 namespace yaml { 99 100 struct SIArgument { 101 bool IsRegister; 102 union { 103 StringValue RegisterName; 104 unsigned StackOffset; 105 }; 106 Optional<unsigned> Mask; 107 108 // Default constructor, which creates a stack argument. 109 SIArgument() : IsRegister(false), StackOffset(0) {} 110 SIArgument(const SIArgument &Other) { 111 IsRegister = Other.IsRegister; 112 if (IsRegister) { 113 ::new ((void *)std::addressof(RegisterName)) 114 StringValue(Other.RegisterName); 115 } else 116 StackOffset = Other.StackOffset; 117 Mask = Other.Mask; 118 } 119 SIArgument &operator=(const SIArgument &Other) { 120 IsRegister = Other.IsRegister; 121 if (IsRegister) { 122 ::new ((void *)std::addressof(RegisterName)) 123 StringValue(Other.RegisterName); 124 } else 125 StackOffset = Other.StackOffset; 126 Mask = Other.Mask; 127 return *this; 128 } 129 ~SIArgument() { 130 if (IsRegister) 131 RegisterName.~StringValue(); 132 } 133 134 // Helper to create a register or stack argument. 135 static inline SIArgument createArgument(bool IsReg) { 136 if (IsReg) 137 return SIArgument(IsReg); 138 return SIArgument(); 139 } 140 141 private: 142 // Construct a register argument. 143 SIArgument(bool) : IsRegister(true), RegisterName() {} 144 }; 145 146 template <> struct MappingTraits<SIArgument> { 147 static void mapping(IO &YamlIO, SIArgument &A) { 148 if (YamlIO.outputting()) { 149 if (A.IsRegister) 150 YamlIO.mapRequired("reg", A.RegisterName); 151 else 152 YamlIO.mapRequired("offset", A.StackOffset); 153 } else { 154 auto Keys = YamlIO.keys(); 155 if (is_contained(Keys, "reg")) { 156 A = SIArgument::createArgument(true); 157 YamlIO.mapRequired("reg", A.RegisterName); 158 } else if (is_contained(Keys, "offset")) 159 YamlIO.mapRequired("offset", A.StackOffset); 160 else 161 YamlIO.setError("missing required key 'reg' or 'offset'"); 162 } 163 YamlIO.mapOptional("mask", A.Mask); 164 } 165 static const bool flow = true; 166 }; 167 168 struct SIArgumentInfo { 169 Optional<SIArgument> PrivateSegmentBuffer; 170 Optional<SIArgument> DispatchPtr; 171 Optional<SIArgument> QueuePtr; 172 Optional<SIArgument> KernargSegmentPtr; 173 Optional<SIArgument> DispatchID; 174 Optional<SIArgument> FlatScratchInit; 175 Optional<SIArgument> PrivateSegmentSize; 176 177 Optional<SIArgument> WorkGroupIDX; 178 Optional<SIArgument> WorkGroupIDY; 179 Optional<SIArgument> WorkGroupIDZ; 180 Optional<SIArgument> WorkGroupInfo; 181 Optional<SIArgument> LDSKernelId; 182 Optional<SIArgument> PrivateSegmentWaveByteOffset; 183 184 Optional<SIArgument> ImplicitArgPtr; 185 Optional<SIArgument> ImplicitBufferPtr; 186 187 Optional<SIArgument> WorkItemIDX; 188 Optional<SIArgument> WorkItemIDY; 189 Optional<SIArgument> WorkItemIDZ; 190 }; 191 192 template <> struct MappingTraits<SIArgumentInfo> { 193 static void mapping(IO &YamlIO, SIArgumentInfo &AI) { 194 YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer); 195 YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr); 196 YamlIO.mapOptional("queuePtr", AI.QueuePtr); 197 YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr); 198 YamlIO.mapOptional("dispatchID", AI.DispatchID); 199 YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit); 200 YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize); 201 202 YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX); 203 YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); 204 YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); 205 YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); 206 YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId); 207 YamlIO.mapOptional("privateSegmentWaveByteOffset", 208 AI.PrivateSegmentWaveByteOffset); 209 210 YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr); 211 YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr); 212 213 YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX); 214 YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY); 215 YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ); 216 } 217 }; 218 219 // Default to default mode for default calling convention. 220 struct SIMode { 221 bool IEEE = true; 222 bool DX10Clamp = true; 223 bool FP32InputDenormals = true; 224 bool FP32OutputDenormals = true; 225 bool FP64FP16InputDenormals = true; 226 bool FP64FP16OutputDenormals = true; 227 228 SIMode() = default; 229 230 SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { 231 IEEE = Mode.IEEE; 232 DX10Clamp = Mode.DX10Clamp; 233 FP32InputDenormals = Mode.FP32InputDenormals; 234 FP32OutputDenormals = Mode.FP32OutputDenormals; 235 FP64FP16InputDenormals = Mode.FP64FP16InputDenormals; 236 FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals; 237 } 238 239 bool operator ==(const SIMode Other) const { 240 return IEEE == Other.IEEE && 241 DX10Clamp == Other.DX10Clamp && 242 FP32InputDenormals == Other.FP32InputDenormals && 243 FP32OutputDenormals == Other.FP32OutputDenormals && 244 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 245 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 246 } 247 }; 248 249 template <> struct MappingTraits<SIMode> { 250 static void mapping(IO &YamlIO, SIMode &Mode) { 251 YamlIO.mapOptional("ieee", Mode.IEEE, true); 252 YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); 253 YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true); 254 YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true); 255 YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true); 256 YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true); 257 } 258 }; 259 260 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 261 uint64_t ExplicitKernArgSize = 0; 262 Align MaxKernArgAlign; 263 uint32_t LDSSize = 0; 264 uint32_t GDSSize = 0; 265 Align DynLDSAlign; 266 bool IsEntryFunction = false; 267 bool NoSignedZerosFPMath = false; 268 bool MemoryBound = false; 269 bool WaveLimiter = false; 270 bool HasSpilledSGPRs = false; 271 bool HasSpilledVGPRs = false; 272 uint32_t HighBitsOf32BitAddress = 0; 273 274 // TODO: 10 may be a better default since it's the maximum. 275 unsigned Occupancy = 0; 276 277 SmallVector<StringValue> WWMReservedRegs; 278 279 StringValue ScratchRSrcReg = "$private_rsrc_reg"; 280 StringValue FrameOffsetReg = "$fp_reg"; 281 StringValue StackPtrOffsetReg = "$sp_reg"; 282 283 unsigned BytesInStackArgArea = 0; 284 bool ReturnsVoid = true; 285 286 Optional<SIArgumentInfo> ArgInfo; 287 SIMode Mode; 288 Optional<FrameIndex> ScavengeFI; 289 StringValue VGPRForAGPRCopy; 290 291 SIMachineFunctionInfo() = default; 292 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 293 const TargetRegisterInfo &TRI, 294 const llvm::MachineFunction &MF); 295 296 void mappingImpl(yaml::IO &YamlIO) override; 297 ~SIMachineFunctionInfo() = default; 298 }; 299 300 template <> struct MappingTraits<SIMachineFunctionInfo> { 301 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 302 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 303 UINT64_C(0)); 304 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign); 305 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 306 YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u); 307 YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align()); 308 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 309 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 310 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 311 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 312 YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false); 313 YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false); 314 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 315 StringValue("$private_rsrc_reg")); 316 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 317 StringValue("$fp_reg")); 318 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 319 StringValue("$sp_reg")); 320 YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u); 321 YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true); 322 YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); 323 YamlIO.mapOptional("mode", MFI.Mode, SIMode()); 324 YamlIO.mapOptional("highBitsOf32BitAddress", 325 MFI.HighBitsOf32BitAddress, 0u); 326 YamlIO.mapOptional("occupancy", MFI.Occupancy, 0); 327 YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs); 328 YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI); 329 YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy, 330 StringValue()); // Don't print out when it's empty. 331 } 332 }; 333 334 } // end namespace yaml 335 336 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 337 /// tells the hardware which interpolation parameters to load. 338 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 339 friend class GCNTargetMachine; 340 341 // State of MODE register, assumed FP mode. 342 AMDGPU::SIModeRegisterDefaults Mode; 343 344 // Registers that may be reserved for spilling purposes. These may be the same 345 // as the input registers. 346 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 347 348 // This is the unswizzled offset from the current dispatch's scratch wave 349 // base to the beginning of the current function's frame. 350 Register FrameOffsetReg = AMDGPU::FP_REG; 351 352 // This is an ABI register used in the non-entry calling convention to 353 // communicate the unswizzled offset from the current dispatch's scratch wave 354 // base to the beginning of the new function's frame. 355 Register StackPtrOffsetReg = AMDGPU::SP_REG; 356 357 AMDGPUFunctionArgInfo ArgInfo; 358 359 // Graphics info. 360 unsigned PSInputAddr = 0; 361 unsigned PSInputEnable = 0; 362 363 /// Number of bytes of arguments this function has on the stack. If the callee 364 /// is expected to restore the argument stack this should be a multiple of 16, 365 /// all usable during a tail call. 366 /// 367 /// The alternative would forbid tail call optimisation in some cases: if we 368 /// want to transfer control from a function with 8-bytes of stack-argument 369 /// space to a function with 16-bytes then misalignment of this value would 370 /// make a stack adjustment necessary, which could not be undone by the 371 /// callee. 372 unsigned BytesInStackArgArea = 0; 373 374 bool ReturnsVoid = true; 375 376 // A pair of default/requested minimum/maximum flat work group sizes. 377 // Minimum - first, maximum - second. 378 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 379 380 // A pair of default/requested minimum/maximum number of waves per execution 381 // unit. Minimum - first, maximum - second. 382 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 383 384 const AMDGPUImagePseudoSourceValue ImagePSV; 385 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; 386 387 private: 388 unsigned NumUserSGPRs = 0; 389 unsigned NumSystemSGPRs = 0; 390 391 bool HasSpilledSGPRs = false; 392 bool HasSpilledVGPRs = false; 393 bool HasNonSpillStackObjects = false; 394 bool IsStackRealigned = false; 395 396 unsigned NumSpilledSGPRs = 0; 397 unsigned NumSpilledVGPRs = 0; 398 399 // Feature bits required for inputs passed in user SGPRs. 400 bool PrivateSegmentBuffer : 1; 401 bool DispatchPtr : 1; 402 bool QueuePtr : 1; 403 bool KernargSegmentPtr : 1; 404 bool DispatchID : 1; 405 bool FlatScratchInit : 1; 406 407 // Feature bits required for inputs passed in system SGPRs. 408 bool WorkGroupIDX : 1; // Always initialized. 409 bool WorkGroupIDY : 1; 410 bool WorkGroupIDZ : 1; 411 bool WorkGroupInfo : 1; 412 bool LDSKernelId : 1; 413 bool PrivateSegmentWaveByteOffset : 1; 414 415 bool WorkItemIDX : 1; // Always initialized. 416 bool WorkItemIDY : 1; 417 bool WorkItemIDZ : 1; 418 419 // Private memory buffer 420 // Compute directly in sgpr[0:1] 421 // Other shaders indirect 64-bits at sgpr[0:1] 422 bool ImplicitBufferPtr : 1; 423 424 // Pointer to where the ABI inserts special kernel arguments separate from the 425 // user arguments. This is an offset from the KernargSegmentPtr. 426 bool ImplicitArgPtr : 1; 427 428 bool MayNeedAGPRs : 1; 429 430 // The hard-wired high half of the address of the global information table 431 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 432 // current hardware only allows a 16 bit value. 433 unsigned GITPtrHigh; 434 435 unsigned HighBitsOf32BitAddress; 436 437 // Current recorded maximum possible occupancy. 438 unsigned Occupancy; 439 440 mutable Optional<bool> UsesAGPRs; 441 442 MCPhysReg getNextUserSGPR() const; 443 444 MCPhysReg getNextSystemSGPR() const; 445 446 public: 447 struct SGPRSpillVGPR { 448 // VGPR used for SGPR spills 449 Register VGPR; 450 451 // If the VGPR is used for SGPR spills in a non-entrypoint function, the 452 // stack slot used to save/restore it in the prolog/epilog. 453 Optional<int> FI; 454 455 SGPRSpillVGPR(Register V, Optional<int> F) : VGPR(V), FI(F) {} 456 }; 457 458 struct VGPRSpillToAGPR { 459 SmallVector<MCPhysReg, 32> Lanes; 460 bool FullyAllocated = false; 461 bool IsDead = false; 462 }; 463 464 // Track VGPRs reserved for WWM. 465 SmallSetVector<Register, 8> WWMReservedRegs; 466 467 /// Track stack slots used for save/restore of reserved WWM VGPRs in the 468 /// prolog/epilog. 469 470 /// FIXME: This is temporary state only needed in PrologEpilogInserter, and 471 /// doesn't really belong here. It does not require serialization 472 SmallVector<int, 8> WWMReservedFrameIndexes; 473 474 void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, 475 const SIRegisterInfo &TRI); 476 477 auto wwmAllocation() const { 478 assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size()); 479 return zip(WWMReservedRegs, WWMReservedFrameIndexes); 480 } 481 482 private: 483 // Track VGPR + wave index for each subregister of the SGPR spilled to 484 // frameindex key. 485 DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> SGPRToVGPRSpills; 486 unsigned NumVGPRSpillLanes = 0; 487 SmallVector<SGPRSpillVGPR, 2> SpillVGPRs; 488 489 DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; 490 491 // AGPRs used for VGPR spills. 492 SmallVector<MCPhysReg, 32> SpillAGPR; 493 494 // VGPRs used for AGPR spills. 495 SmallVector<MCPhysReg, 32> SpillVGPR; 496 497 // Emergency stack slot. Sometimes, we create this before finalizing the stack 498 // frame, so save it here and add it to the RegScavenger later. 499 Optional<int> ScavengeFI; 500 501 private: 502 Register VGPRForAGPRCopy; 503 504 public: 505 Register getVGPRForAGPRCopy() const { 506 return VGPRForAGPRCopy; 507 } 508 509 void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) { 510 VGPRForAGPRCopy = NewVGPRForAGPRCopy; 511 } 512 513 public: // FIXME 514 /// If this is set, an SGPR used for save/restore of the register used for the 515 /// frame pointer. 516 Register SGPRForFPSaveRestoreCopy; 517 Optional<int> FramePointerSaveIndex; 518 519 /// If this is set, an SGPR used for save/restore of the register used for the 520 /// base pointer. 521 Register SGPRForBPSaveRestoreCopy; 522 Optional<int> BasePointerSaveIndex; 523 524 bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); 525 526 public: 527 SIMachineFunctionInfo(const MachineFunction &MF); 528 SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; 529 530 MachineFunctionInfo * 531 clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, 532 const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 533 const override; 534 535 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, 536 const MachineFunction &MF, 537 PerFunctionMIParsingState &PFS, 538 SMDiagnostic &Error, SMRange &SourceRange); 539 540 void reserveWWMRegister(Register Reg) { 541 WWMReservedRegs.insert(Reg); 542 } 543 544 AMDGPU::SIModeRegisterDefaults getMode() const { 545 return Mode; 546 } 547 548 ArrayRef<SIRegisterInfo::SpilledReg> 549 getSGPRToVGPRSpills(int FrameIndex) const { 550 auto I = SGPRToVGPRSpills.find(FrameIndex); 551 return (I == SGPRToVGPRSpills.end()) 552 ? ArrayRef<SIRegisterInfo::SpilledReg>() 553 : makeArrayRef(I->second); 554 } 555 556 ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; } 557 558 ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { 559 return SpillAGPR; 560 } 561 562 ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { 563 return SpillVGPR; 564 } 565 566 MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { 567 auto I = VGPRToAGPRSpills.find(FrameIndex); 568 return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister 569 : I->second.Lanes[Lane]; 570 } 571 572 void setVGPRToAGPRSpillDead(int FrameIndex) { 573 auto I = VGPRToAGPRSpills.find(FrameIndex); 574 if (I != VGPRToAGPRSpills.end()) 575 I->second.IsDead = true; 576 } 577 578 bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, 579 unsigned NumLane) const; 580 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 581 bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); 582 583 /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill 584 /// to the default stack. 585 bool removeDeadFrameIndices(MachineFrameInfo &MFI, 586 bool ResetSGPRSpillStackIDs); 587 588 int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); 589 Optional<int> getOptionalScavengeFI() const { return ScavengeFI; } 590 591 unsigned getBytesInStackArgArea() const { 592 return BytesInStackArgArea; 593 } 594 595 void setBytesInStackArgArea(unsigned Bytes) { 596 BytesInStackArgArea = Bytes; 597 } 598 599 // Add user SGPRs. 600 Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 601 Register addDispatchPtr(const SIRegisterInfo &TRI); 602 Register addQueuePtr(const SIRegisterInfo &TRI); 603 Register addKernargSegmentPtr(const SIRegisterInfo &TRI); 604 Register addDispatchID(const SIRegisterInfo &TRI); 605 Register addFlatScratchInit(const SIRegisterInfo &TRI); 606 Register addImplicitBufferPtr(const SIRegisterInfo &TRI); 607 Register addLDSKernelId(); 608 609 /// Increment user SGPRs used for padding the argument list only. 610 Register addReservedUserSGPR() { 611 Register Next = getNextUserSGPR(); 612 ++NumUserSGPRs; 613 return Next; 614 } 615 616 // Add system SGPRs. 617 Register addWorkGroupIDX() { 618 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 619 NumSystemSGPRs += 1; 620 return ArgInfo.WorkGroupIDX.getRegister(); 621 } 622 623 Register addWorkGroupIDY() { 624 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 625 NumSystemSGPRs += 1; 626 return ArgInfo.WorkGroupIDY.getRegister(); 627 } 628 629 Register addWorkGroupIDZ() { 630 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 631 NumSystemSGPRs += 1; 632 return ArgInfo.WorkGroupIDZ.getRegister(); 633 } 634 635 Register addWorkGroupInfo() { 636 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 637 NumSystemSGPRs += 1; 638 return ArgInfo.WorkGroupInfo.getRegister(); 639 } 640 641 // Add special VGPR inputs 642 void setWorkItemIDX(ArgDescriptor Arg) { 643 ArgInfo.WorkItemIDX = Arg; 644 } 645 646 void setWorkItemIDY(ArgDescriptor Arg) { 647 ArgInfo.WorkItemIDY = Arg; 648 } 649 650 void setWorkItemIDZ(ArgDescriptor Arg) { 651 ArgInfo.WorkItemIDZ = Arg; 652 } 653 654 Register addPrivateSegmentWaveByteOffset() { 655 ArgInfo.PrivateSegmentWaveByteOffset 656 = ArgDescriptor::createRegister(getNextSystemSGPR()); 657 NumSystemSGPRs += 1; 658 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 659 } 660 661 void setPrivateSegmentWaveByteOffset(Register Reg) { 662 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 663 } 664 665 bool hasPrivateSegmentBuffer() const { 666 return PrivateSegmentBuffer; 667 } 668 669 bool hasDispatchPtr() const { 670 return DispatchPtr; 671 } 672 673 bool hasQueuePtr() const { 674 return QueuePtr; 675 } 676 677 bool hasKernargSegmentPtr() const { 678 return KernargSegmentPtr; 679 } 680 681 bool hasDispatchID() const { 682 return DispatchID; 683 } 684 685 bool hasFlatScratchInit() const { 686 return FlatScratchInit; 687 } 688 689 bool hasWorkGroupIDX() const { 690 return WorkGroupIDX; 691 } 692 693 bool hasWorkGroupIDY() const { 694 return WorkGroupIDY; 695 } 696 697 bool hasWorkGroupIDZ() const { 698 return WorkGroupIDZ; 699 } 700 701 bool hasWorkGroupInfo() const { 702 return WorkGroupInfo; 703 } 704 705 bool hasLDSKernelId() const { return LDSKernelId; } 706 707 bool hasPrivateSegmentWaveByteOffset() const { 708 return PrivateSegmentWaveByteOffset; 709 } 710 711 bool hasWorkItemIDX() const { 712 return WorkItemIDX; 713 } 714 715 bool hasWorkItemIDY() const { 716 return WorkItemIDY; 717 } 718 719 bool hasWorkItemIDZ() const { 720 return WorkItemIDZ; 721 } 722 723 bool hasImplicitArgPtr() const { 724 return ImplicitArgPtr; 725 } 726 727 bool hasImplicitBufferPtr() const { 728 return ImplicitBufferPtr; 729 } 730 731 AMDGPUFunctionArgInfo &getArgInfo() { 732 return ArgInfo; 733 } 734 735 const AMDGPUFunctionArgInfo &getArgInfo() const { 736 return ArgInfo; 737 } 738 739 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> 740 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 741 return ArgInfo.getPreloadedValue(Value); 742 } 743 744 MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 745 auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value)); 746 return Arg ? Arg->getRegister() : MCRegister(); 747 } 748 749 unsigned getGITPtrHigh() const { 750 return GITPtrHigh; 751 } 752 753 Register getGITPtrLoReg(const MachineFunction &MF) const; 754 755 uint32_t get32BitAddressHighBits() const { 756 return HighBitsOf32BitAddress; 757 } 758 759 unsigned getNumUserSGPRs() const { 760 return NumUserSGPRs; 761 } 762 763 unsigned getNumPreloadedSGPRs() const { 764 return NumUserSGPRs + NumSystemSGPRs; 765 } 766 767 Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { 768 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 769 } 770 771 /// Returns the physical register reserved for use as the resource 772 /// descriptor for scratch accesses. 773 Register getScratchRSrcReg() const { 774 return ScratchRSrcReg; 775 } 776 777 void setScratchRSrcReg(Register Reg) { 778 assert(Reg != 0 && "Should never be unset"); 779 ScratchRSrcReg = Reg; 780 } 781 782 Register getFrameOffsetReg() const { 783 return FrameOffsetReg; 784 } 785 786 void setFrameOffsetReg(Register Reg) { 787 assert(Reg != 0 && "Should never be unset"); 788 FrameOffsetReg = Reg; 789 } 790 791 void setStackPtrOffsetReg(Register Reg) { 792 assert(Reg != 0 && "Should never be unset"); 793 StackPtrOffsetReg = Reg; 794 } 795 796 // Note the unset value for this is AMDGPU::SP_REG rather than 797 // NoRegister. This is mostly a workaround for MIR tests where state that 798 // can't be directly computed from the function is not preserved in serialized 799 // MIR. 800 Register getStackPtrOffsetReg() const { 801 return StackPtrOffsetReg; 802 } 803 804 Register getQueuePtrUserSGPR() const { 805 return ArgInfo.QueuePtr.getRegister(); 806 } 807 808 Register getImplicitBufferPtrUserSGPR() const { 809 return ArgInfo.ImplicitBufferPtr.getRegister(); 810 } 811 812 bool hasSpilledSGPRs() const { 813 return HasSpilledSGPRs; 814 } 815 816 void setHasSpilledSGPRs(bool Spill = true) { 817 HasSpilledSGPRs = Spill; 818 } 819 820 bool hasSpilledVGPRs() const { 821 return HasSpilledVGPRs; 822 } 823 824 void setHasSpilledVGPRs(bool Spill = true) { 825 HasSpilledVGPRs = Spill; 826 } 827 828 bool hasNonSpillStackObjects() const { 829 return HasNonSpillStackObjects; 830 } 831 832 void setHasNonSpillStackObjects(bool StackObject = true) { 833 HasNonSpillStackObjects = StackObject; 834 } 835 836 bool isStackRealigned() const { 837 return IsStackRealigned; 838 } 839 840 void setIsStackRealigned(bool Realigned = true) { 841 IsStackRealigned = Realigned; 842 } 843 844 unsigned getNumSpilledSGPRs() const { 845 return NumSpilledSGPRs; 846 } 847 848 unsigned getNumSpilledVGPRs() const { 849 return NumSpilledVGPRs; 850 } 851 852 void addToSpilledSGPRs(unsigned num) { 853 NumSpilledSGPRs += num; 854 } 855 856 void addToSpilledVGPRs(unsigned num) { 857 NumSpilledVGPRs += num; 858 } 859 860 unsigned getPSInputAddr() const { 861 return PSInputAddr; 862 } 863 864 unsigned getPSInputEnable() const { 865 return PSInputEnable; 866 } 867 868 bool isPSInputAllocated(unsigned Index) const { 869 return PSInputAddr & (1 << Index); 870 } 871 872 void markPSInputAllocated(unsigned Index) { 873 PSInputAddr |= 1 << Index; 874 } 875 876 void markPSInputEnabled(unsigned Index) { 877 PSInputEnable |= 1 << Index; 878 } 879 880 bool returnsVoid() const { 881 return ReturnsVoid; 882 } 883 884 void setIfReturnsVoid(bool Value) { 885 ReturnsVoid = Value; 886 } 887 888 /// \returns A pair of default/requested minimum/maximum flat work group sizes 889 /// for this function. 890 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 891 return FlatWorkGroupSizes; 892 } 893 894 /// \returns Default/requested minimum flat work group size for this function. 895 unsigned getMinFlatWorkGroupSize() const { 896 return FlatWorkGroupSizes.first; 897 } 898 899 /// \returns Default/requested maximum flat work group size for this function. 900 unsigned getMaxFlatWorkGroupSize() const { 901 return FlatWorkGroupSizes.second; 902 } 903 904 /// \returns A pair of default/requested minimum/maximum number of waves per 905 /// execution unit. 906 std::pair<unsigned, unsigned> getWavesPerEU() const { 907 return WavesPerEU; 908 } 909 910 /// \returns Default/requested minimum number of waves per execution unit. 911 unsigned getMinWavesPerEU() const { 912 return WavesPerEU.first; 913 } 914 915 /// \returns Default/requested maximum number of waves per execution unit. 916 unsigned getMaxWavesPerEU() const { 917 return WavesPerEU.second; 918 } 919 920 /// \returns SGPR used for \p Dim's work group ID. 921 Register getWorkGroupIDSGPR(unsigned Dim) const { 922 switch (Dim) { 923 case 0: 924 assert(hasWorkGroupIDX()); 925 return ArgInfo.WorkGroupIDX.getRegister(); 926 case 1: 927 assert(hasWorkGroupIDY()); 928 return ArgInfo.WorkGroupIDY.getRegister(); 929 case 2: 930 assert(hasWorkGroupIDZ()); 931 return ArgInfo.WorkGroupIDZ.getRegister(); 932 } 933 llvm_unreachable("unexpected dimension"); 934 } 935 936 const AMDGPUImagePseudoSourceValue * 937 getImagePSV(const AMDGPUTargetMachine &TM) { 938 return &ImagePSV; 939 } 940 941 const AMDGPUGWSResourcePseudoSourceValue * 942 getGWSPSV(const AMDGPUTargetMachine &TM) { 943 return &GWSResourcePSV; 944 } 945 946 unsigned getOccupancy() const { 947 return Occupancy; 948 } 949 950 unsigned getMinAllowedOccupancy() const { 951 if (!isMemoryBound() && !needsWaveLimiter()) 952 return Occupancy; 953 return (Occupancy < 4) ? Occupancy : 4; 954 } 955 956 void limitOccupancy(const MachineFunction &MF); 957 958 void limitOccupancy(unsigned Limit) { 959 if (Occupancy > Limit) 960 Occupancy = Limit; 961 } 962 963 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 964 if (Occupancy < Limit) 965 Occupancy = Limit; 966 limitOccupancy(MF); 967 } 968 969 bool mayNeedAGPRs() const { 970 return MayNeedAGPRs; 971 } 972 973 // \returns true if a function has a use of AGPRs via inline asm or 974 // has a call which may use it. 975 bool mayUseAGPRs(const Function &F) const; 976 977 // \returns true if a function needs or may need AGPRs. 978 bool usesAGPRs(const MachineFunction &MF) const; 979 }; 980 981 } // end namespace llvm 982 983 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 984