1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/SparseBitVector.h" 26 #include "llvm/CodeGen/MIRYamlMapping.h" 27 #include "llvm/CodeGen/PseudoSourceValue.h" 28 #include "llvm/CodeGen/TargetInstrInfo.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include <array> 32 #include <cassert> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class MachineFrameInfo; 39 class MachineFunction; 40 class TargetRegisterClass; 41 42 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 43 public: 44 // TODO: Is the img rsrc useful? 45 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 46 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 47 48 bool isConstant(const MachineFrameInfo *) const override { 49 // This should probably be true for most images, but we will start by being 50 // conservative. 51 return false; 52 } 53 54 bool isAliased(const MachineFrameInfo *) const override { 55 return true; 56 } 57 58 bool mayAlias(const MachineFrameInfo *) const override { 59 return true; 60 } 61 }; 62 63 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 64 public: 65 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 66 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 67 68 bool isConstant(const MachineFrameInfo *) const override { 69 // This should probably be true for most images, but we will start by being 70 // conservative. 71 return false; 72 } 73 74 bool isAliased(const MachineFrameInfo *) const override { 75 return true; 76 } 77 78 bool mayAlias(const MachineFrameInfo *) const override { 79 return true; 80 } 81 }; 82 83 namespace yaml { 84 85 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 86 uint64_t ExplicitKernArgSize = 0; 87 unsigned MaxKernArgAlign = 0; 88 unsigned LDSSize = 0; 89 bool IsEntryFunction = false; 90 bool NoSignedZerosFPMath = false; 91 bool MemoryBound = false; 92 bool WaveLimiter = false; 93 94 StringValue ScratchRSrcReg = "$private_rsrc_reg"; 95 StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; 96 StringValue FrameOffsetReg = "$fp_reg"; 97 StringValue StackPtrOffsetReg = "$sp_reg"; 98 99 SIMachineFunctionInfo() = default; 100 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 101 const TargetRegisterInfo &TRI); 102 103 void mappingImpl(yaml::IO &YamlIO) override; 104 ~SIMachineFunctionInfo() = default; 105 }; 106 107 template <> struct MappingTraits<SIMachineFunctionInfo> { 108 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 109 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 110 UINT64_C(0)); 111 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); 112 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 113 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 114 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 115 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 116 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 117 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 118 StringValue("$private_rsrc_reg")); 119 YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, 120 StringValue("$scratch_wave_offset_reg")); 121 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 122 StringValue("$fp_reg")); 123 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 124 StringValue("$sp_reg")); 125 } 126 }; 127 128 } // end namespace yaml 129 130 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 131 /// tells the hardware which interpolation parameters to load. 132 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 133 friend class GCNTargetMachine; 134 135 unsigned TIDReg = AMDGPU::NoRegister; 136 137 // Registers that may be reserved for spilling purposes. These may be the same 138 // as the input registers. 139 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 140 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 141 142 // This is the current function's incremented size from the kernel's scratch 143 // wave offset register. For an entry function, this is exactly the same as 144 // the ScratchWaveOffsetReg. 145 unsigned FrameOffsetReg = AMDGPU::FP_REG; 146 147 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 148 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 149 150 AMDGPUFunctionArgInfo ArgInfo; 151 152 // State of MODE register, assumed FP mode. 153 AMDGPU::SIModeRegisterDefaults Mode; 154 155 // Graphics info. 156 unsigned PSInputAddr = 0; 157 unsigned PSInputEnable = 0; 158 159 /// Number of bytes of arguments this function has on the stack. If the callee 160 /// is expected to restore the argument stack this should be a multiple of 16, 161 /// all usable during a tail call. 162 /// 163 /// The alternative would forbid tail call optimisation in some cases: if we 164 /// want to transfer control from a function with 8-bytes of stack-argument 165 /// space to a function with 16-bytes then misalignment of this value would 166 /// make a stack adjustment necessary, which could not be undone by the 167 /// callee. 168 unsigned BytesInStackArgArea = 0; 169 170 bool ReturnsVoid = true; 171 172 // A pair of default/requested minimum/maximum flat work group sizes. 173 // Minimum - first, maximum - second. 174 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 175 176 // A pair of default/requested minimum/maximum number of waves per execution 177 // unit. Minimum - first, maximum - second. 178 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 179 180 DenseMap<const Value *, 181 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 182 DenseMap<const Value *, 183 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 184 185 private: 186 unsigned LDSWaveSpillSize = 0; 187 unsigned NumUserSGPRs = 0; 188 unsigned NumSystemSGPRs = 0; 189 190 bool HasSpilledSGPRs = false; 191 bool HasSpilledVGPRs = false; 192 bool HasNonSpillStackObjects = false; 193 bool IsStackRealigned = false; 194 195 unsigned NumSpilledSGPRs = 0; 196 unsigned NumSpilledVGPRs = 0; 197 198 // Feature bits required for inputs passed in user SGPRs. 199 bool PrivateSegmentBuffer : 1; 200 bool DispatchPtr : 1; 201 bool QueuePtr : 1; 202 bool KernargSegmentPtr : 1; 203 bool DispatchID : 1; 204 bool FlatScratchInit : 1; 205 206 // Feature bits required for inputs passed in system SGPRs. 207 bool WorkGroupIDX : 1; // Always initialized. 208 bool WorkGroupIDY : 1; 209 bool WorkGroupIDZ : 1; 210 bool WorkGroupInfo : 1; 211 bool PrivateSegmentWaveByteOffset : 1; 212 213 bool WorkItemIDX : 1; // Always initialized. 214 bool WorkItemIDY : 1; 215 bool WorkItemIDZ : 1; 216 217 // Private memory buffer 218 // Compute directly in sgpr[0:1] 219 // Other shaders indirect 64-bits at sgpr[0:1] 220 bool ImplicitBufferPtr : 1; 221 222 // Pointer to where the ABI inserts special kernel arguments separate from the 223 // user arguments. This is an offset from the KernargSegmentPtr. 224 bool ImplicitArgPtr : 1; 225 226 // The hard-wired high half of the address of the global information table 227 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 228 // current hardware only allows a 16 bit value. 229 unsigned GITPtrHigh; 230 231 unsigned HighBitsOf32BitAddress; 232 233 // Current recorded maximum possible occupancy. 234 unsigned Occupancy; 235 236 MCPhysReg getNextUserSGPR() const; 237 238 MCPhysReg getNextSystemSGPR() const; 239 240 public: 241 struct SpilledReg { 242 unsigned VGPR = 0; 243 int Lane = -1; 244 245 SpilledReg() = default; 246 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 247 248 bool hasLane() { return Lane != -1;} 249 bool hasReg() { return VGPR != 0;} 250 }; 251 252 struct SGPRSpillVGPRCSR { 253 // VGPR used for SGPR spills 254 unsigned VGPR; 255 256 // If the VGPR is a CSR, the stack slot used to save/restore it in the 257 // prolog/epilog. 258 Optional<int> FI; 259 260 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 261 }; 262 263 SparseBitVector<> WWMReservedRegs; 264 265 void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } 266 267 private: 268 // SGPR->VGPR spilling support. 269 using SpillRegMask = std::pair<unsigned, unsigned>; 270 271 // Track VGPR + wave index for each subregister of the SGPR spilled to 272 // frameindex key. 273 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 274 unsigned NumVGPRSpillLanes = 0; 275 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 276 277 public: 278 SIMachineFunctionInfo(const MachineFunction &MF); 279 280 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); 281 282 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 283 auto I = SGPRToVGPRSpills.find(FrameIndex); 284 return (I == SGPRToVGPRSpills.end()) ? 285 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 286 } 287 288 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 289 return SpillVGPRs; 290 } 291 292 AMDGPU::SIModeRegisterDefaults getMode() const { 293 return Mode; 294 } 295 296 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 297 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 298 299 bool hasCalculatedTID() const { return TIDReg != 0; }; 300 unsigned getTIDReg() const { return TIDReg; }; 301 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 302 303 unsigned getBytesInStackArgArea() const { 304 return BytesInStackArgArea; 305 } 306 307 void setBytesInStackArgArea(unsigned Bytes) { 308 BytesInStackArgArea = Bytes; 309 } 310 311 // Add user SGPRs. 312 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 313 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 314 unsigned addQueuePtr(const SIRegisterInfo &TRI); 315 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 316 unsigned addDispatchID(const SIRegisterInfo &TRI); 317 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 318 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 319 320 // Add system SGPRs. 321 unsigned addWorkGroupIDX() { 322 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 323 NumSystemSGPRs += 1; 324 return ArgInfo.WorkGroupIDX.getRegister(); 325 } 326 327 unsigned addWorkGroupIDY() { 328 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 329 NumSystemSGPRs += 1; 330 return ArgInfo.WorkGroupIDY.getRegister(); 331 } 332 333 unsigned addWorkGroupIDZ() { 334 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 335 NumSystemSGPRs += 1; 336 return ArgInfo.WorkGroupIDZ.getRegister(); 337 } 338 339 unsigned addWorkGroupInfo() { 340 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 341 NumSystemSGPRs += 1; 342 return ArgInfo.WorkGroupInfo.getRegister(); 343 } 344 345 // Add special VGPR inputs 346 void setWorkItemIDX(ArgDescriptor Arg) { 347 ArgInfo.WorkItemIDX = Arg; 348 } 349 350 void setWorkItemIDY(ArgDescriptor Arg) { 351 ArgInfo.WorkItemIDY = Arg; 352 } 353 354 void setWorkItemIDZ(ArgDescriptor Arg) { 355 ArgInfo.WorkItemIDZ = Arg; 356 } 357 358 unsigned addPrivateSegmentWaveByteOffset() { 359 ArgInfo.PrivateSegmentWaveByteOffset 360 = ArgDescriptor::createRegister(getNextSystemSGPR()); 361 NumSystemSGPRs += 1; 362 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 363 } 364 365 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 366 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 367 } 368 369 bool hasPrivateSegmentBuffer() const { 370 return PrivateSegmentBuffer; 371 } 372 373 bool hasDispatchPtr() const { 374 return DispatchPtr; 375 } 376 377 bool hasQueuePtr() const { 378 return QueuePtr; 379 } 380 381 bool hasKernargSegmentPtr() const { 382 return KernargSegmentPtr; 383 } 384 385 bool hasDispatchID() const { 386 return DispatchID; 387 } 388 389 bool hasFlatScratchInit() const { 390 return FlatScratchInit; 391 } 392 393 bool hasWorkGroupIDX() const { 394 return WorkGroupIDX; 395 } 396 397 bool hasWorkGroupIDY() const { 398 return WorkGroupIDY; 399 } 400 401 bool hasWorkGroupIDZ() const { 402 return WorkGroupIDZ; 403 } 404 405 bool hasWorkGroupInfo() const { 406 return WorkGroupInfo; 407 } 408 409 bool hasPrivateSegmentWaveByteOffset() const { 410 return PrivateSegmentWaveByteOffset; 411 } 412 413 bool hasWorkItemIDX() const { 414 return WorkItemIDX; 415 } 416 417 bool hasWorkItemIDY() const { 418 return WorkItemIDY; 419 } 420 421 bool hasWorkItemIDZ() const { 422 return WorkItemIDZ; 423 } 424 425 bool hasImplicitArgPtr() const { 426 return ImplicitArgPtr; 427 } 428 429 bool hasImplicitBufferPtr() const { 430 return ImplicitBufferPtr; 431 } 432 433 AMDGPUFunctionArgInfo &getArgInfo() { 434 return ArgInfo; 435 } 436 437 const AMDGPUFunctionArgInfo &getArgInfo() const { 438 return ArgInfo; 439 } 440 441 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 442 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 443 return ArgInfo.getPreloadedValue(Value); 444 } 445 446 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 447 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 448 } 449 450 unsigned getGITPtrHigh() const { 451 return GITPtrHigh; 452 } 453 454 unsigned get32BitAddressHighBits() const { 455 return HighBitsOf32BitAddress; 456 } 457 458 unsigned getNumUserSGPRs() const { 459 return NumUserSGPRs; 460 } 461 462 unsigned getNumPreloadedSGPRs() const { 463 return NumUserSGPRs + NumSystemSGPRs; 464 } 465 466 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 467 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 468 } 469 470 /// Returns the physical register reserved for use as the resource 471 /// descriptor for scratch accesses. 472 unsigned getScratchRSrcReg() const { 473 return ScratchRSrcReg; 474 } 475 476 void setScratchRSrcReg(unsigned Reg) { 477 assert(Reg != 0 && "Should never be unset"); 478 ScratchRSrcReg = Reg; 479 } 480 481 unsigned getScratchWaveOffsetReg() const { 482 return ScratchWaveOffsetReg; 483 } 484 485 unsigned getFrameOffsetReg() const { 486 return FrameOffsetReg; 487 } 488 489 void setStackPtrOffsetReg(unsigned Reg) { 490 assert(Reg != 0 && "Should never be unset"); 491 StackPtrOffsetReg = Reg; 492 } 493 494 // Note the unset value for this is AMDGPU::SP_REG rather than 495 // NoRegister. This is mostly a workaround for MIR tests where state that 496 // can't be directly computed from the function is not preserved in serialized 497 // MIR. 498 unsigned getStackPtrOffsetReg() const { 499 return StackPtrOffsetReg; 500 } 501 502 void setScratchWaveOffsetReg(unsigned Reg) { 503 assert(Reg != 0 && "Should never be unset"); 504 ScratchWaveOffsetReg = Reg; 505 if (isEntryFunction()) 506 FrameOffsetReg = ScratchWaveOffsetReg; 507 } 508 509 unsigned getQueuePtrUserSGPR() const { 510 return ArgInfo.QueuePtr.getRegister(); 511 } 512 513 unsigned getImplicitBufferPtrUserSGPR() const { 514 return ArgInfo.ImplicitBufferPtr.getRegister(); 515 } 516 517 bool hasSpilledSGPRs() const { 518 return HasSpilledSGPRs; 519 } 520 521 void setHasSpilledSGPRs(bool Spill = true) { 522 HasSpilledSGPRs = Spill; 523 } 524 525 bool hasSpilledVGPRs() const { 526 return HasSpilledVGPRs; 527 } 528 529 void setHasSpilledVGPRs(bool Spill = true) { 530 HasSpilledVGPRs = Spill; 531 } 532 533 bool hasNonSpillStackObjects() const { 534 return HasNonSpillStackObjects; 535 } 536 537 void setHasNonSpillStackObjects(bool StackObject = true) { 538 HasNonSpillStackObjects = StackObject; 539 } 540 541 bool isStackRealigned() const { 542 return IsStackRealigned; 543 } 544 545 void setIsStackRealigned(bool Realigned = true) { 546 IsStackRealigned = Realigned; 547 } 548 549 unsigned getNumSpilledSGPRs() const { 550 return NumSpilledSGPRs; 551 } 552 553 unsigned getNumSpilledVGPRs() const { 554 return NumSpilledVGPRs; 555 } 556 557 void addToSpilledSGPRs(unsigned num) { 558 NumSpilledSGPRs += num; 559 } 560 561 void addToSpilledVGPRs(unsigned num) { 562 NumSpilledVGPRs += num; 563 } 564 565 unsigned getPSInputAddr() const { 566 return PSInputAddr; 567 } 568 569 unsigned getPSInputEnable() const { 570 return PSInputEnable; 571 } 572 573 bool isPSInputAllocated(unsigned Index) const { 574 return PSInputAddr & (1 << Index); 575 } 576 577 void markPSInputAllocated(unsigned Index) { 578 PSInputAddr |= 1 << Index; 579 } 580 581 void markPSInputEnabled(unsigned Index) { 582 PSInputEnable |= 1 << Index; 583 } 584 585 bool returnsVoid() const { 586 return ReturnsVoid; 587 } 588 589 void setIfReturnsVoid(bool Value) { 590 ReturnsVoid = Value; 591 } 592 593 /// \returns A pair of default/requested minimum/maximum flat work group sizes 594 /// for this function. 595 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 596 return FlatWorkGroupSizes; 597 } 598 599 /// \returns Default/requested minimum flat work group size for this function. 600 unsigned getMinFlatWorkGroupSize() const { 601 return FlatWorkGroupSizes.first; 602 } 603 604 /// \returns Default/requested maximum flat work group size for this function. 605 unsigned getMaxFlatWorkGroupSize() const { 606 return FlatWorkGroupSizes.second; 607 } 608 609 /// \returns A pair of default/requested minimum/maximum number of waves per 610 /// execution unit. 611 std::pair<unsigned, unsigned> getWavesPerEU() const { 612 return WavesPerEU; 613 } 614 615 /// \returns Default/requested minimum number of waves per execution unit. 616 unsigned getMinWavesPerEU() const { 617 return WavesPerEU.first; 618 } 619 620 /// \returns Default/requested maximum number of waves per execution unit. 621 unsigned getMaxWavesPerEU() const { 622 return WavesPerEU.second; 623 } 624 625 /// \returns SGPR used for \p Dim's work group ID. 626 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 627 switch (Dim) { 628 case 0: 629 assert(hasWorkGroupIDX()); 630 return ArgInfo.WorkGroupIDX.getRegister(); 631 case 1: 632 assert(hasWorkGroupIDY()); 633 return ArgInfo.WorkGroupIDY.getRegister(); 634 case 2: 635 assert(hasWorkGroupIDZ()); 636 return ArgInfo.WorkGroupIDZ.getRegister(); 637 } 638 llvm_unreachable("unexpected dimension"); 639 } 640 641 /// \returns VGPR used for \p Dim' work item ID. 642 unsigned getWorkItemIDVGPR(unsigned Dim) const; 643 644 unsigned getLDSWaveSpillSize() const { 645 return LDSWaveSpillSize; 646 } 647 648 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 649 const Value *BufferRsrc) { 650 assert(BufferRsrc); 651 auto PSV = BufferPSVs.try_emplace( 652 BufferRsrc, 653 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 654 return PSV.first->second.get(); 655 } 656 657 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 658 const Value *ImgRsrc) { 659 assert(ImgRsrc); 660 auto PSV = ImagePSVs.try_emplace( 661 ImgRsrc, 662 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 663 return PSV.first->second.get(); 664 } 665 666 unsigned getOccupancy() const { 667 return Occupancy; 668 } 669 670 unsigned getMinAllowedOccupancy() const { 671 if (!isMemoryBound() && !needsWaveLimiter()) 672 return Occupancy; 673 return (Occupancy < 4) ? Occupancy : 4; 674 } 675 676 void limitOccupancy(const MachineFunction &MF); 677 678 void limitOccupancy(unsigned Limit) { 679 if (Occupancy > Limit) 680 Occupancy = Limit; 681 } 682 683 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 684 if (Occupancy < Limit) 685 Occupancy = Limit; 686 limitOccupancy(MF); 687 } 688 }; 689 690 } // end namespace llvm 691 692 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 693