1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/SparseBitVector.h" 26 #include "llvm/CodeGen/MIRYamlMapping.h" 27 #include "llvm/CodeGen/PseudoSourceValue.h" 28 #include "llvm/CodeGen/TargetInstrInfo.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include <array> 32 #include <cassert> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class MachineFrameInfo; 39 class MachineFunction; 40 class TargetRegisterClass; 41 42 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 43 public: 44 // TODO: Is the img rsrc useful? 45 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 46 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 47 48 bool isConstant(const MachineFrameInfo *) const override { 49 // This should probably be true for most images, but we will start by being 50 // conservative. 51 return false; 52 } 53 54 bool isAliased(const MachineFrameInfo *) const override { 55 return true; 56 } 57 58 bool mayAlias(const MachineFrameInfo *) const override { 59 return true; 60 } 61 }; 62 63 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 64 public: 65 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 66 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 67 68 bool isConstant(const MachineFrameInfo *) const override { 69 // This should probably be true for most images, but we will start by being 70 // conservative. 71 return false; 72 } 73 74 bool isAliased(const MachineFrameInfo *) const override { 75 return true; 76 } 77 78 bool mayAlias(const MachineFrameInfo *) const override { 79 return true; 80 } 81 }; 82 83 namespace yaml { 84 85 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 86 uint64_t ExplicitKernArgSize = 0; 87 unsigned MaxKernArgAlign = 0; 88 unsigned LDSSize = 0; 89 bool IsEntryFunction = false; 90 bool NoSignedZerosFPMath = false; 91 bool MemoryBound = false; 92 bool WaveLimiter = false; 93 94 StringValue ScratchRSrcReg = "$private_rsrc_reg"; 95 StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; 96 StringValue FrameOffsetReg = "$fp_reg"; 97 StringValue StackPtrOffsetReg = "$sp_reg"; 98 99 SIMachineFunctionInfo() = default; 100 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 101 const TargetRegisterInfo &TRI); 102 103 void mappingImpl(yaml::IO &YamlIO) override; 104 ~SIMachineFunctionInfo() = default; 105 }; 106 107 template <> struct MappingTraits<SIMachineFunctionInfo> { 108 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 109 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 110 UINT64_C(0)); 111 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); 112 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 113 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 114 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 115 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 116 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 117 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 118 StringValue("$private_rsrc_reg")); 119 YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, 120 StringValue("$scratch_wave_offset_reg")); 121 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 122 StringValue("$fp_reg")); 123 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 124 StringValue("$sp_reg")); 125 } 126 }; 127 128 } // end namespace yaml 129 130 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 131 /// tells the hardware which interpolation parameters to load. 132 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 133 friend class GCNTargetMachine; 134 135 unsigned TIDReg = AMDGPU::NoRegister; 136 137 // Registers that may be reserved for spilling purposes. These may be the same 138 // as the input registers. 139 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 140 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 141 142 // This is the current function's incremented size from the kernel's scratch 143 // wave offset register. For an entry function, this is exactly the same as 144 // the ScratchWaveOffsetReg. 145 unsigned FrameOffsetReg = AMDGPU::FP_REG; 146 147 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 148 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 149 150 AMDGPUFunctionArgInfo ArgInfo; 151 152 // State of MODE register, assumed FP mode. 153 AMDGPU::SIModeRegisterDefaults Mode; 154 155 // Graphics info. 156 unsigned PSInputAddr = 0; 157 unsigned PSInputEnable = 0; 158 159 /// Number of bytes of arguments this function has on the stack. If the callee 160 /// is expected to restore the argument stack this should be a multiple of 16, 161 /// all usable during a tail call. 162 /// 163 /// The alternative would forbid tail call optimisation in some cases: if we 164 /// want to transfer control from a function with 8-bytes of stack-argument 165 /// space to a function with 16-bytes then misalignment of this value would 166 /// make a stack adjustment necessary, which could not be undone by the 167 /// callee. 168 unsigned BytesInStackArgArea = 0; 169 170 bool ReturnsVoid = true; 171 172 // A pair of default/requested minimum/maximum flat work group sizes. 173 // Minimum - first, maximum - second. 174 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 175 176 // A pair of default/requested minimum/maximum number of waves per execution 177 // unit. Minimum - first, maximum - second. 178 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 179 180 DenseMap<const Value *, 181 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 182 DenseMap<const Value *, 183 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 184 185 private: 186 unsigned LDSWaveSpillSize = 0; 187 unsigned NumUserSGPRs = 0; 188 unsigned NumSystemSGPRs = 0; 189 190 bool HasSpilledSGPRs = false; 191 bool HasSpilledVGPRs = false; 192 bool HasNonSpillStackObjects = false; 193 bool IsStackRealigned = false; 194 195 unsigned NumSpilledSGPRs = 0; 196 unsigned NumSpilledVGPRs = 0; 197 198 // Feature bits required for inputs passed in user SGPRs. 199 bool PrivateSegmentBuffer : 1; 200 bool DispatchPtr : 1; 201 bool QueuePtr : 1; 202 bool KernargSegmentPtr : 1; 203 bool DispatchID : 1; 204 bool FlatScratchInit : 1; 205 206 // Feature bits required for inputs passed in system SGPRs. 207 bool WorkGroupIDX : 1; // Always initialized. 208 bool WorkGroupIDY : 1; 209 bool WorkGroupIDZ : 1; 210 bool WorkGroupInfo : 1; 211 bool PrivateSegmentWaveByteOffset : 1; 212 213 bool WorkItemIDX : 1; // Always initialized. 214 bool WorkItemIDY : 1; 215 bool WorkItemIDZ : 1; 216 217 // Private memory buffer 218 // Compute directly in sgpr[0:1] 219 // Other shaders indirect 64-bits at sgpr[0:1] 220 bool ImplicitBufferPtr : 1; 221 222 // Pointer to where the ABI inserts special kernel arguments separate from the 223 // user arguments. This is an offset from the KernargSegmentPtr. 224 bool ImplicitArgPtr : 1; 225 226 // The hard-wired high half of the address of the global information table 227 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 228 // current hardware only allows a 16 bit value. 229 unsigned GITPtrHigh; 230 231 unsigned HighBitsOf32BitAddress; 232 233 // Current recorded maximum possible occupancy. 234 unsigned Occupancy; 235 236 MCPhysReg getNextUserSGPR() const; 237 238 MCPhysReg getNextSystemSGPR() const; 239 240 public: 241 struct SpilledReg { 242 unsigned VGPR = 0; 243 int Lane = -1; 244 245 SpilledReg() = default; 246 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 247 248 bool hasLane() { return Lane != -1;} 249 bool hasReg() { return VGPR != 0;} 250 }; 251 252 struct SGPRSpillVGPRCSR { 253 // VGPR used for SGPR spills 254 unsigned VGPR; 255 256 // If the VGPR is a CSR, the stack slot used to save/restore it in the 257 // prolog/epilog. 258 Optional<int> FI; 259 260 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 261 }; 262 263 SparseBitVector<> WWMReservedRegs; 264 265 void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } 266 267 private: 268 // SGPR->VGPR spilling support. 269 using SpillRegMask = std::pair<unsigned, unsigned>; 270 271 // Track VGPR + wave index for each subregister of the SGPR spilled to 272 // frameindex key. 273 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 274 unsigned NumVGPRSpillLanes = 0; 275 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 276 277 public: 278 SIMachineFunctionInfo(const MachineFunction &MF); 279 280 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); 281 282 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 283 auto I = SGPRToVGPRSpills.find(FrameIndex); 284 return (I == SGPRToVGPRSpills.end()) ? 285 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 286 } 287 288 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 289 return SpillVGPRs; 290 } 291 292 AMDGPU::SIModeRegisterDefaults getMode() const { 293 return Mode; 294 } 295 296 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 297 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 298 299 bool hasCalculatedTID() const { return TIDReg != 0; }; 300 unsigned getTIDReg() const { return TIDReg; }; 301 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 302 303 unsigned getBytesInStackArgArea() const { 304 return BytesInStackArgArea; 305 } 306 307 void setBytesInStackArgArea(unsigned Bytes) { 308 BytesInStackArgArea = Bytes; 309 } 310 311 // Add user SGPRs. 312 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 313 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 314 unsigned addQueuePtr(const SIRegisterInfo &TRI); 315 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 316 unsigned addDispatchID(const SIRegisterInfo &TRI); 317 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 318 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 319 320 // Add system SGPRs. 321 unsigned addWorkGroupIDX() { 322 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 323 NumSystemSGPRs += 1; 324 return ArgInfo.WorkGroupIDX.getRegister(); 325 } 326 327 unsigned addWorkGroupIDY() { 328 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 329 NumSystemSGPRs += 1; 330 return ArgInfo.WorkGroupIDY.getRegister(); 331 } 332 333 unsigned addWorkGroupIDZ() { 334 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 335 NumSystemSGPRs += 1; 336 return ArgInfo.WorkGroupIDZ.getRegister(); 337 } 338 339 unsigned addWorkGroupInfo() { 340 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 341 NumSystemSGPRs += 1; 342 return ArgInfo.WorkGroupInfo.getRegister(); 343 } 344 345 // Add special VGPR inputs 346 void setWorkItemIDX(ArgDescriptor Arg) { 347 ArgInfo.WorkItemIDX = Arg; 348 } 349 350 void setWorkItemIDY(ArgDescriptor Arg) { 351 ArgInfo.WorkItemIDY = Arg; 352 } 353 354 void setWorkItemIDZ(ArgDescriptor Arg) { 355 ArgInfo.WorkItemIDZ = Arg; 356 } 357 358 unsigned addPrivateSegmentWaveByteOffset() { 359 ArgInfo.PrivateSegmentWaveByteOffset 360 = ArgDescriptor::createRegister(getNextSystemSGPR()); 361 NumSystemSGPRs += 1; 362 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 363 } 364 365 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 366 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 367 } 368 369 bool hasPrivateSegmentBuffer() const { 370 return PrivateSegmentBuffer; 371 } 372 373 bool hasDispatchPtr() const { 374 return DispatchPtr; 375 } 376 377 bool hasQueuePtr() const { 378 return QueuePtr; 379 } 380 381 bool hasKernargSegmentPtr() const { 382 return KernargSegmentPtr; 383 } 384 385 bool hasDispatchID() const { 386 return DispatchID; 387 } 388 389 bool hasFlatScratchInit() const { 390 return FlatScratchInit; 391 } 392 393 bool hasWorkGroupIDX() const { 394 return WorkGroupIDX; 395 } 396 397 bool hasWorkGroupIDY() const { 398 return WorkGroupIDY; 399 } 400 401 bool hasWorkGroupIDZ() const { 402 return WorkGroupIDZ; 403 } 404 405 bool hasWorkGroupInfo() const { 406 return WorkGroupInfo; 407 } 408 409 bool hasPrivateSegmentWaveByteOffset() const { 410 return PrivateSegmentWaveByteOffset; 411 } 412 413 bool hasWorkItemIDX() const { 414 return WorkItemIDX; 415 } 416 417 bool hasWorkItemIDY() const { 418 return WorkItemIDY; 419 } 420 421 bool hasWorkItemIDZ() const { 422 return WorkItemIDZ; 423 } 424 425 bool hasImplicitArgPtr() const { 426 return ImplicitArgPtr; 427 } 428 429 bool hasImplicitBufferPtr() const { 430 return ImplicitBufferPtr; 431 } 432 433 AMDGPUFunctionArgInfo &getArgInfo() { 434 return ArgInfo; 435 } 436 437 const AMDGPUFunctionArgInfo &getArgInfo() const { 438 return ArgInfo; 439 } 440 441 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 442 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 443 return ArgInfo.getPreloadedValue(Value); 444 } 445 446 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 447 auto Arg = ArgInfo.getPreloadedValue(Value).first; 448 return Arg ? Arg->getRegister() : 0; 449 } 450 451 unsigned getGITPtrHigh() const { 452 return GITPtrHigh; 453 } 454 455 unsigned get32BitAddressHighBits() const { 456 return HighBitsOf32BitAddress; 457 } 458 459 unsigned getNumUserSGPRs() const { 460 return NumUserSGPRs; 461 } 462 463 unsigned getNumPreloadedSGPRs() const { 464 return NumUserSGPRs + NumSystemSGPRs; 465 } 466 467 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 468 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 469 } 470 471 /// Returns the physical register reserved for use as the resource 472 /// descriptor for scratch accesses. 473 unsigned getScratchRSrcReg() const { 474 return ScratchRSrcReg; 475 } 476 477 void setScratchRSrcReg(unsigned Reg) { 478 assert(Reg != 0 && "Should never be unset"); 479 ScratchRSrcReg = Reg; 480 } 481 482 unsigned getScratchWaveOffsetReg() const { 483 return ScratchWaveOffsetReg; 484 } 485 486 unsigned getFrameOffsetReg() const { 487 return FrameOffsetReg; 488 } 489 490 void setFrameOffsetReg(unsigned Reg) { 491 assert(Reg != 0 && "Should never be unset"); 492 FrameOffsetReg = Reg; 493 } 494 495 void setStackPtrOffsetReg(unsigned Reg) { 496 assert(Reg != 0 && "Should never be unset"); 497 StackPtrOffsetReg = Reg; 498 } 499 500 // Note the unset value for this is AMDGPU::SP_REG rather than 501 // NoRegister. This is mostly a workaround for MIR tests where state that 502 // can't be directly computed from the function is not preserved in serialized 503 // MIR. 504 unsigned getStackPtrOffsetReg() const { 505 return StackPtrOffsetReg; 506 } 507 508 void setScratchWaveOffsetReg(unsigned Reg) { 509 assert(Reg != 0 && "Should never be unset"); 510 ScratchWaveOffsetReg = Reg; 511 } 512 513 unsigned getQueuePtrUserSGPR() const { 514 return ArgInfo.QueuePtr.getRegister(); 515 } 516 517 unsigned getImplicitBufferPtrUserSGPR() const { 518 return ArgInfo.ImplicitBufferPtr.getRegister(); 519 } 520 521 bool hasSpilledSGPRs() const { 522 return HasSpilledSGPRs; 523 } 524 525 void setHasSpilledSGPRs(bool Spill = true) { 526 HasSpilledSGPRs = Spill; 527 } 528 529 bool hasSpilledVGPRs() const { 530 return HasSpilledVGPRs; 531 } 532 533 void setHasSpilledVGPRs(bool Spill = true) { 534 HasSpilledVGPRs = Spill; 535 } 536 537 bool hasNonSpillStackObjects() const { 538 return HasNonSpillStackObjects; 539 } 540 541 void setHasNonSpillStackObjects(bool StackObject = true) { 542 HasNonSpillStackObjects = StackObject; 543 } 544 545 bool isStackRealigned() const { 546 return IsStackRealigned; 547 } 548 549 void setIsStackRealigned(bool Realigned = true) { 550 IsStackRealigned = Realigned; 551 } 552 553 unsigned getNumSpilledSGPRs() const { 554 return NumSpilledSGPRs; 555 } 556 557 unsigned getNumSpilledVGPRs() const { 558 return NumSpilledVGPRs; 559 } 560 561 void addToSpilledSGPRs(unsigned num) { 562 NumSpilledSGPRs += num; 563 } 564 565 void addToSpilledVGPRs(unsigned num) { 566 NumSpilledVGPRs += num; 567 } 568 569 unsigned getPSInputAddr() const { 570 return PSInputAddr; 571 } 572 573 unsigned getPSInputEnable() const { 574 return PSInputEnable; 575 } 576 577 bool isPSInputAllocated(unsigned Index) const { 578 return PSInputAddr & (1 << Index); 579 } 580 581 void markPSInputAllocated(unsigned Index) { 582 PSInputAddr |= 1 << Index; 583 } 584 585 void markPSInputEnabled(unsigned Index) { 586 PSInputEnable |= 1 << Index; 587 } 588 589 bool returnsVoid() const { 590 return ReturnsVoid; 591 } 592 593 void setIfReturnsVoid(bool Value) { 594 ReturnsVoid = Value; 595 } 596 597 /// \returns A pair of default/requested minimum/maximum flat work group sizes 598 /// for this function. 599 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 600 return FlatWorkGroupSizes; 601 } 602 603 /// \returns Default/requested minimum flat work group size for this function. 604 unsigned getMinFlatWorkGroupSize() const { 605 return FlatWorkGroupSizes.first; 606 } 607 608 /// \returns Default/requested maximum flat work group size for this function. 609 unsigned getMaxFlatWorkGroupSize() const { 610 return FlatWorkGroupSizes.second; 611 } 612 613 /// \returns A pair of default/requested minimum/maximum number of waves per 614 /// execution unit. 615 std::pair<unsigned, unsigned> getWavesPerEU() const { 616 return WavesPerEU; 617 } 618 619 /// \returns Default/requested minimum number of waves per execution unit. 620 unsigned getMinWavesPerEU() const { 621 return WavesPerEU.first; 622 } 623 624 /// \returns Default/requested maximum number of waves per execution unit. 625 unsigned getMaxWavesPerEU() const { 626 return WavesPerEU.second; 627 } 628 629 /// \returns SGPR used for \p Dim's work group ID. 630 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 631 switch (Dim) { 632 case 0: 633 assert(hasWorkGroupIDX()); 634 return ArgInfo.WorkGroupIDX.getRegister(); 635 case 1: 636 assert(hasWorkGroupIDY()); 637 return ArgInfo.WorkGroupIDY.getRegister(); 638 case 2: 639 assert(hasWorkGroupIDZ()); 640 return ArgInfo.WorkGroupIDZ.getRegister(); 641 } 642 llvm_unreachable("unexpected dimension"); 643 } 644 645 /// \returns VGPR used for \p Dim' work item ID. 646 unsigned getWorkItemIDVGPR(unsigned Dim) const; 647 648 unsigned getLDSWaveSpillSize() const { 649 return LDSWaveSpillSize; 650 } 651 652 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 653 const Value *BufferRsrc) { 654 assert(BufferRsrc); 655 auto PSV = BufferPSVs.try_emplace( 656 BufferRsrc, 657 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 658 return PSV.first->second.get(); 659 } 660 661 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 662 const Value *ImgRsrc) { 663 assert(ImgRsrc); 664 auto PSV = ImagePSVs.try_emplace( 665 ImgRsrc, 666 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 667 return PSV.first->second.get(); 668 } 669 670 unsigned getOccupancy() const { 671 return Occupancy; 672 } 673 674 unsigned getMinAllowedOccupancy() const { 675 if (!isMemoryBound() && !needsWaveLimiter()) 676 return Occupancy; 677 return (Occupancy < 4) ? Occupancy : 4; 678 } 679 680 void limitOccupancy(const MachineFunction &MF); 681 682 void limitOccupancy(unsigned Limit) { 683 if (Occupancy > Limit) 684 Occupancy = Limit; 685 } 686 687 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 688 if (Occupancy < Limit) 689 Occupancy = Limit; 690 limitOccupancy(MF); 691 } 692 }; 693 694 } // end namespace llvm 695 696 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 697