1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/SparseBitVector.h" 26 #include "llvm/CodeGen/MIRYamlMapping.h" 27 #include "llvm/CodeGen/PseudoSourceValue.h" 28 #include "llvm/CodeGen/TargetInstrInfo.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include <array> 32 #include <cassert> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class MachineFrameInfo; 39 class MachineFunction; 40 class TargetRegisterClass; 41 42 class AMDGPUPseudoSourceValue : public PseudoSourceValue { 43 public: 44 enum AMDGPUPSVKind : unsigned { 45 PSVBuffer = PseudoSourceValue::TargetCustom, 46 PSVImage 47 }; 48 49 protected: 50 AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) 51 : PseudoSourceValue(Kind, TII) {} 52 53 public: 54 bool isConstant(const MachineFrameInfo *) const override { 55 // This should probably be true for most images, but we will start by being 56 // conservative. 57 return false; 58 } 59 60 bool isAliased(const MachineFrameInfo *) const override { 61 return true; 62 } 63 64 bool mayAlias(const MachineFrameInfo *) const override { 65 return true; 66 } 67 }; 68 69 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue { 70 public: 71 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) 72 : AMDGPUPseudoSourceValue(PSVBuffer, TII) {} 73 74 static bool classof(const PseudoSourceValue *V) { 75 return V->kind() == PSVBuffer; 76 } 77 }; 78 79 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { 80 public: 81 // TODO: Is the img rsrc useful? 82 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) 83 : AMDGPUPseudoSourceValue(PSVImage, TII) {} 84 85 static bool classof(const PseudoSourceValue *V) { 86 return V->kind() == PSVImage; 87 } 88 }; 89 90 namespace yaml { 91 92 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 93 uint64_t ExplicitKernArgSize = 0; 94 unsigned MaxKernArgAlign = 0; 95 unsigned LDSSize = 0; 96 bool IsEntryFunction = false; 97 bool NoSignedZerosFPMath = false; 98 bool MemoryBound = false; 99 bool WaveLimiter = false; 100 101 StringValue ScratchRSrcReg = "$private_rsrc_reg"; 102 StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; 103 StringValue FrameOffsetReg = "$fp_reg"; 104 StringValue StackPtrOffsetReg = "$sp_reg"; 105 106 SIMachineFunctionInfo() = default; 107 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 108 const TargetRegisterInfo &TRI); 109 110 void mappingImpl(yaml::IO &YamlIO) override; 111 ~SIMachineFunctionInfo() = default; 112 }; 113 114 template <> struct MappingTraits<SIMachineFunctionInfo> { 115 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 116 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 117 UINT64_C(0)); 118 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); 119 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 120 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 121 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 122 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 123 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 124 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 125 StringValue("$private_rsrc_reg")); 126 YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, 127 StringValue("$scratch_wave_offset_reg")); 128 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 129 StringValue("$fp_reg")); 130 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 131 StringValue("$sp_reg")); 132 } 133 }; 134 135 } // end namespace yaml 136 137 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 138 /// tells the hardware which interpolation parameters to load. 139 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 140 friend class GCNTargetMachine; 141 142 unsigned TIDReg = AMDGPU::NoRegister; 143 144 // Registers that may be reserved for spilling purposes. These may be the same 145 // as the input registers. 146 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 147 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 148 149 // This is the current function's incremented size from the kernel's scratch 150 // wave offset register. For an entry function, this is exactly the same as 151 // the ScratchWaveOffsetReg. 152 unsigned FrameOffsetReg = AMDGPU::FP_REG; 153 154 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 155 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 156 157 AMDGPUFunctionArgInfo ArgInfo; 158 159 // State of MODE register, assumed FP mode. 160 AMDGPU::SIModeRegisterDefaults Mode; 161 162 // Graphics info. 163 unsigned PSInputAddr = 0; 164 unsigned PSInputEnable = 0; 165 166 /// Number of bytes of arguments this function has on the stack. If the callee 167 /// is expected to restore the argument stack this should be a multiple of 16, 168 /// all usable during a tail call. 169 /// 170 /// The alternative would forbid tail call optimisation in some cases: if we 171 /// want to transfer control from a function with 8-bytes of stack-argument 172 /// space to a function with 16-bytes then misalignment of this value would 173 /// make a stack adjustment necessary, which could not be undone by the 174 /// callee. 175 unsigned BytesInStackArgArea = 0; 176 177 bool ReturnsVoid = true; 178 179 // A pair of default/requested minimum/maximum flat work group sizes. 180 // Minimum - first, maximum - second. 181 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 182 183 // A pair of default/requested minimum/maximum number of waves per execution 184 // unit. Minimum - first, maximum - second. 185 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 186 187 DenseMap<const Value *, 188 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 189 DenseMap<const Value *, 190 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 191 192 private: 193 unsigned LDSWaveSpillSize = 0; 194 unsigned NumUserSGPRs = 0; 195 unsigned NumSystemSGPRs = 0; 196 197 bool HasSpilledSGPRs = false; 198 bool HasSpilledVGPRs = false; 199 bool HasNonSpillStackObjects = false; 200 bool IsStackRealigned = false; 201 202 unsigned NumSpilledSGPRs = 0; 203 unsigned NumSpilledVGPRs = 0; 204 205 // Feature bits required for inputs passed in user SGPRs. 206 bool PrivateSegmentBuffer : 1; 207 bool DispatchPtr : 1; 208 bool QueuePtr : 1; 209 bool KernargSegmentPtr : 1; 210 bool DispatchID : 1; 211 bool FlatScratchInit : 1; 212 213 // Feature bits required for inputs passed in system SGPRs. 214 bool WorkGroupIDX : 1; // Always initialized. 215 bool WorkGroupIDY : 1; 216 bool WorkGroupIDZ : 1; 217 bool WorkGroupInfo : 1; 218 bool PrivateSegmentWaveByteOffset : 1; 219 220 bool WorkItemIDX : 1; // Always initialized. 221 bool WorkItemIDY : 1; 222 bool WorkItemIDZ : 1; 223 224 // Private memory buffer 225 // Compute directly in sgpr[0:1] 226 // Other shaders indirect 64-bits at sgpr[0:1] 227 bool ImplicitBufferPtr : 1; 228 229 // Pointer to where the ABI inserts special kernel arguments separate from the 230 // user arguments. This is an offset from the KernargSegmentPtr. 231 bool ImplicitArgPtr : 1; 232 233 // The hard-wired high half of the address of the global information table 234 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 235 // current hardware only allows a 16 bit value. 236 unsigned GITPtrHigh; 237 238 unsigned HighBitsOf32BitAddress; 239 240 // Current recorded maximum possible occupancy. 241 unsigned Occupancy; 242 243 MCPhysReg getNextUserSGPR() const; 244 245 MCPhysReg getNextSystemSGPR() const; 246 247 public: 248 struct SpilledReg { 249 unsigned VGPR = 0; 250 int Lane = -1; 251 252 SpilledReg() = default; 253 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 254 255 bool hasLane() { return Lane != -1;} 256 bool hasReg() { return VGPR != 0;} 257 }; 258 259 struct SGPRSpillVGPRCSR { 260 // VGPR used for SGPR spills 261 unsigned VGPR; 262 263 // If the VGPR is a CSR, the stack slot used to save/restore it in the 264 // prolog/epilog. 265 Optional<int> FI; 266 267 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 268 }; 269 270 SparseBitVector<> WWMReservedRegs; 271 272 void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } 273 274 private: 275 // SGPR->VGPR spilling support. 276 using SpillRegMask = std::pair<unsigned, unsigned>; 277 278 // Track VGPR + wave index for each subregister of the SGPR spilled to 279 // frameindex key. 280 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 281 unsigned NumVGPRSpillLanes = 0; 282 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 283 284 public: 285 SIMachineFunctionInfo(const MachineFunction &MF); 286 287 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); 288 289 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 290 auto I = SGPRToVGPRSpills.find(FrameIndex); 291 return (I == SGPRToVGPRSpills.end()) ? 292 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 293 } 294 295 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 296 return SpillVGPRs; 297 } 298 299 AMDGPU::SIModeRegisterDefaults getMode() const { 300 return Mode; 301 } 302 303 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 304 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 305 306 bool hasCalculatedTID() const { return TIDReg != 0; }; 307 unsigned getTIDReg() const { return TIDReg; }; 308 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 309 310 unsigned getBytesInStackArgArea() const { 311 return BytesInStackArgArea; 312 } 313 314 void setBytesInStackArgArea(unsigned Bytes) { 315 BytesInStackArgArea = Bytes; 316 } 317 318 // Add user SGPRs. 319 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 320 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 321 unsigned addQueuePtr(const SIRegisterInfo &TRI); 322 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 323 unsigned addDispatchID(const SIRegisterInfo &TRI); 324 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 325 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 326 327 // Add system SGPRs. 328 unsigned addWorkGroupIDX() { 329 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 330 NumSystemSGPRs += 1; 331 return ArgInfo.WorkGroupIDX.getRegister(); 332 } 333 334 unsigned addWorkGroupIDY() { 335 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 336 NumSystemSGPRs += 1; 337 return ArgInfo.WorkGroupIDY.getRegister(); 338 } 339 340 unsigned addWorkGroupIDZ() { 341 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 342 NumSystemSGPRs += 1; 343 return ArgInfo.WorkGroupIDZ.getRegister(); 344 } 345 346 unsigned addWorkGroupInfo() { 347 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 348 NumSystemSGPRs += 1; 349 return ArgInfo.WorkGroupInfo.getRegister(); 350 } 351 352 // Add special VGPR inputs 353 void setWorkItemIDX(ArgDescriptor Arg) { 354 ArgInfo.WorkItemIDX = Arg; 355 } 356 357 void setWorkItemIDY(ArgDescriptor Arg) { 358 ArgInfo.WorkItemIDY = Arg; 359 } 360 361 void setWorkItemIDZ(ArgDescriptor Arg) { 362 ArgInfo.WorkItemIDZ = Arg; 363 } 364 365 unsigned addPrivateSegmentWaveByteOffset() { 366 ArgInfo.PrivateSegmentWaveByteOffset 367 = ArgDescriptor::createRegister(getNextSystemSGPR()); 368 NumSystemSGPRs += 1; 369 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 370 } 371 372 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 373 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 374 } 375 376 bool hasPrivateSegmentBuffer() const { 377 return PrivateSegmentBuffer; 378 } 379 380 bool hasDispatchPtr() const { 381 return DispatchPtr; 382 } 383 384 bool hasQueuePtr() const { 385 return QueuePtr; 386 } 387 388 bool hasKernargSegmentPtr() const { 389 return KernargSegmentPtr; 390 } 391 392 bool hasDispatchID() const { 393 return DispatchID; 394 } 395 396 bool hasFlatScratchInit() const { 397 return FlatScratchInit; 398 } 399 400 bool hasWorkGroupIDX() const { 401 return WorkGroupIDX; 402 } 403 404 bool hasWorkGroupIDY() const { 405 return WorkGroupIDY; 406 } 407 408 bool hasWorkGroupIDZ() const { 409 return WorkGroupIDZ; 410 } 411 412 bool hasWorkGroupInfo() const { 413 return WorkGroupInfo; 414 } 415 416 bool hasPrivateSegmentWaveByteOffset() const { 417 return PrivateSegmentWaveByteOffset; 418 } 419 420 bool hasWorkItemIDX() const { 421 return WorkItemIDX; 422 } 423 424 bool hasWorkItemIDY() const { 425 return WorkItemIDY; 426 } 427 428 bool hasWorkItemIDZ() const { 429 return WorkItemIDZ; 430 } 431 432 bool hasImplicitArgPtr() const { 433 return ImplicitArgPtr; 434 } 435 436 bool hasImplicitBufferPtr() const { 437 return ImplicitBufferPtr; 438 } 439 440 AMDGPUFunctionArgInfo &getArgInfo() { 441 return ArgInfo; 442 } 443 444 const AMDGPUFunctionArgInfo &getArgInfo() const { 445 return ArgInfo; 446 } 447 448 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 449 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 450 return ArgInfo.getPreloadedValue(Value); 451 } 452 453 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 454 auto Arg = ArgInfo.getPreloadedValue(Value).first; 455 return Arg ? Arg->getRegister() : 0; 456 } 457 458 unsigned getGITPtrHigh() const { 459 return GITPtrHigh; 460 } 461 462 unsigned get32BitAddressHighBits() const { 463 return HighBitsOf32BitAddress; 464 } 465 466 unsigned getNumUserSGPRs() const { 467 return NumUserSGPRs; 468 } 469 470 unsigned getNumPreloadedSGPRs() const { 471 return NumUserSGPRs + NumSystemSGPRs; 472 } 473 474 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 475 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 476 } 477 478 /// Returns the physical register reserved for use as the resource 479 /// descriptor for scratch accesses. 480 unsigned getScratchRSrcReg() const { 481 return ScratchRSrcReg; 482 } 483 484 void setScratchRSrcReg(unsigned Reg) { 485 assert(Reg != 0 && "Should never be unset"); 486 ScratchRSrcReg = Reg; 487 } 488 489 unsigned getScratchWaveOffsetReg() const { 490 return ScratchWaveOffsetReg; 491 } 492 493 unsigned getFrameOffsetReg() const { 494 return FrameOffsetReg; 495 } 496 497 void setFrameOffsetReg(unsigned Reg) { 498 assert(Reg != 0 && "Should never be unset"); 499 FrameOffsetReg = Reg; 500 } 501 502 void setStackPtrOffsetReg(unsigned Reg) { 503 assert(Reg != 0 && "Should never be unset"); 504 StackPtrOffsetReg = Reg; 505 } 506 507 // Note the unset value for this is AMDGPU::SP_REG rather than 508 // NoRegister. This is mostly a workaround for MIR tests where state that 509 // can't be directly computed from the function is not preserved in serialized 510 // MIR. 511 unsigned getStackPtrOffsetReg() const { 512 return StackPtrOffsetReg; 513 } 514 515 void setScratchWaveOffsetReg(unsigned Reg) { 516 assert(Reg != 0 && "Should never be unset"); 517 ScratchWaveOffsetReg = Reg; 518 } 519 520 unsigned getQueuePtrUserSGPR() const { 521 return ArgInfo.QueuePtr.getRegister(); 522 } 523 524 unsigned getImplicitBufferPtrUserSGPR() const { 525 return ArgInfo.ImplicitBufferPtr.getRegister(); 526 } 527 528 bool hasSpilledSGPRs() const { 529 return HasSpilledSGPRs; 530 } 531 532 void setHasSpilledSGPRs(bool Spill = true) { 533 HasSpilledSGPRs = Spill; 534 } 535 536 bool hasSpilledVGPRs() const { 537 return HasSpilledVGPRs; 538 } 539 540 void setHasSpilledVGPRs(bool Spill = true) { 541 HasSpilledVGPRs = Spill; 542 } 543 544 bool hasNonSpillStackObjects() const { 545 return HasNonSpillStackObjects; 546 } 547 548 void setHasNonSpillStackObjects(bool StackObject = true) { 549 HasNonSpillStackObjects = StackObject; 550 } 551 552 bool isStackRealigned() const { 553 return IsStackRealigned; 554 } 555 556 void setIsStackRealigned(bool Realigned = true) { 557 IsStackRealigned = Realigned; 558 } 559 560 unsigned getNumSpilledSGPRs() const { 561 return NumSpilledSGPRs; 562 } 563 564 unsigned getNumSpilledVGPRs() const { 565 return NumSpilledVGPRs; 566 } 567 568 void addToSpilledSGPRs(unsigned num) { 569 NumSpilledSGPRs += num; 570 } 571 572 void addToSpilledVGPRs(unsigned num) { 573 NumSpilledVGPRs += num; 574 } 575 576 unsigned getPSInputAddr() const { 577 return PSInputAddr; 578 } 579 580 unsigned getPSInputEnable() const { 581 return PSInputEnable; 582 } 583 584 bool isPSInputAllocated(unsigned Index) const { 585 return PSInputAddr & (1 << Index); 586 } 587 588 void markPSInputAllocated(unsigned Index) { 589 PSInputAddr |= 1 << Index; 590 } 591 592 void markPSInputEnabled(unsigned Index) { 593 PSInputEnable |= 1 << Index; 594 } 595 596 bool returnsVoid() const { 597 return ReturnsVoid; 598 } 599 600 void setIfReturnsVoid(bool Value) { 601 ReturnsVoid = Value; 602 } 603 604 /// \returns A pair of default/requested minimum/maximum flat work group sizes 605 /// for this function. 606 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 607 return FlatWorkGroupSizes; 608 } 609 610 /// \returns Default/requested minimum flat work group size for this function. 611 unsigned getMinFlatWorkGroupSize() const { 612 return FlatWorkGroupSizes.first; 613 } 614 615 /// \returns Default/requested maximum flat work group size for this function. 616 unsigned getMaxFlatWorkGroupSize() const { 617 return FlatWorkGroupSizes.second; 618 } 619 620 /// \returns A pair of default/requested minimum/maximum number of waves per 621 /// execution unit. 622 std::pair<unsigned, unsigned> getWavesPerEU() const { 623 return WavesPerEU; 624 } 625 626 /// \returns Default/requested minimum number of waves per execution unit. 627 unsigned getMinWavesPerEU() const { 628 return WavesPerEU.first; 629 } 630 631 /// \returns Default/requested maximum number of waves per execution unit. 632 unsigned getMaxWavesPerEU() const { 633 return WavesPerEU.second; 634 } 635 636 /// \returns SGPR used for \p Dim's work group ID. 637 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 638 switch (Dim) { 639 case 0: 640 assert(hasWorkGroupIDX()); 641 return ArgInfo.WorkGroupIDX.getRegister(); 642 case 1: 643 assert(hasWorkGroupIDY()); 644 return ArgInfo.WorkGroupIDY.getRegister(); 645 case 2: 646 assert(hasWorkGroupIDZ()); 647 return ArgInfo.WorkGroupIDZ.getRegister(); 648 } 649 llvm_unreachable("unexpected dimension"); 650 } 651 652 /// \returns VGPR used for \p Dim' work item ID. 653 unsigned getWorkItemIDVGPR(unsigned Dim) const; 654 655 unsigned getLDSWaveSpillSize() const { 656 return LDSWaveSpillSize; 657 } 658 659 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 660 const Value *BufferRsrc) { 661 assert(BufferRsrc); 662 auto PSV = BufferPSVs.try_emplace( 663 BufferRsrc, 664 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 665 return PSV.first->second.get(); 666 } 667 668 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 669 const Value *ImgRsrc) { 670 assert(ImgRsrc); 671 auto PSV = ImagePSVs.try_emplace( 672 ImgRsrc, 673 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 674 return PSV.first->second.get(); 675 } 676 677 unsigned getOccupancy() const { 678 return Occupancy; 679 } 680 681 unsigned getMinAllowedOccupancy() const { 682 if (!isMemoryBound() && !needsWaveLimiter()) 683 return Occupancy; 684 return (Occupancy < 4) ? Occupancy : 4; 685 } 686 687 void limitOccupancy(const MachineFunction &MF); 688 689 void limitOccupancy(unsigned Limit) { 690 if (Occupancy > Limit) 691 Occupancy = Limit; 692 } 693 694 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 695 if (Occupancy < Limit) 696 Occupancy = Limit; 697 limitOccupancy(MF); 698 } 699 }; 700 701 } // end namespace llvm 702 703 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 704