1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/CodeGen/MIRYamlMapping.h" 26 #include "llvm/CodeGen/PseudoSourceValue.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <array> 31 #include <cassert> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class MachineFrameInfo; 38 class MachineFunction; 39 class TargetRegisterClass; 40 41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 42 public: 43 // TODO: Is the img rsrc useful? 44 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 45 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 46 47 bool isConstant(const MachineFrameInfo *) const override { 48 // This should probably be true for most images, but we will start by being 49 // conservative. 50 return false; 51 } 52 53 bool isAliased(const MachineFrameInfo *) const override { 54 return true; 55 } 56 57 bool mayAlias(const MachineFrameInfo *) const override { 58 return true; 59 } 60 }; 61 62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 63 public: 64 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 65 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 66 67 bool isConstant(const MachineFrameInfo *) const override { 68 // This should probably be true for most images, but we will start by being 69 // conservative. 70 return false; 71 } 72 73 bool isAliased(const MachineFrameInfo *) const override { 74 return true; 75 } 76 77 bool mayAlias(const MachineFrameInfo *) const override { 78 return true; 79 } 80 }; 81 82 namespace yaml { 83 84 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 85 uint64_t ExplicitKernArgSize = 0; 86 unsigned MaxKernArgAlign = 0; 87 unsigned LDSSize = 0; 88 bool IsEntryFunction = false; 89 bool NoSignedZerosFPMath = false; 90 bool MemoryBound = false; 91 bool WaveLimiter = false; 92 93 StringValue ScratchRSrcReg = "$private_rsrc_reg"; 94 StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; 95 StringValue FrameOffsetReg = "$fp_reg"; 96 StringValue StackPtrOffsetReg = "$sp_reg"; 97 98 SIMachineFunctionInfo() = default; 99 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 100 const TargetRegisterInfo &TRI); 101 102 void mappingImpl(yaml::IO &YamlIO) override; 103 ~SIMachineFunctionInfo() = default; 104 }; 105 106 template <> struct MappingTraits<SIMachineFunctionInfo> { 107 static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 108 YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 109 UINT64_C(0)); 110 YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); 111 YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 112 YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 113 YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 114 YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 115 YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 116 YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 117 StringValue("$private_rsrc_reg")); 118 YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, 119 StringValue("$scratch_wave_offset_reg")); 120 YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 121 StringValue("$fp_reg")); 122 YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 123 StringValue("$sp_reg")); 124 } 125 }; 126 127 } // end namespace yaml 128 129 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 130 /// tells the hardware which interpolation parameters to load. 131 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 132 friend class GCNTargetMachine; 133 134 unsigned TIDReg = AMDGPU::NoRegister; 135 136 // Registers that may be reserved for spilling purposes. These may be the same 137 // as the input registers. 138 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 139 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 140 141 // This is the current function's incremented size from the kernel's scratch 142 // wave offset register. For an entry function, this is exactly the same as 143 // the ScratchWaveOffsetReg. 144 unsigned FrameOffsetReg = AMDGPU::FP_REG; 145 146 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 147 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 148 149 AMDGPUFunctionArgInfo ArgInfo; 150 151 // State of MODE register, assumed FP mode. 152 AMDGPU::SIModeRegisterDefaults Mode; 153 154 // Graphics info. 155 unsigned PSInputAddr = 0; 156 unsigned PSInputEnable = 0; 157 158 /// Number of bytes of arguments this function has on the stack. If the callee 159 /// is expected to restore the argument stack this should be a multiple of 16, 160 /// all usable during a tail call. 161 /// 162 /// The alternative would forbid tail call optimisation in some cases: if we 163 /// want to transfer control from a function with 8-bytes of stack-argument 164 /// space to a function with 16-bytes then misalignment of this value would 165 /// make a stack adjustment necessary, which could not be undone by the 166 /// callee. 167 unsigned BytesInStackArgArea = 0; 168 169 bool ReturnsVoid = true; 170 171 // A pair of default/requested minimum/maximum flat work group sizes. 172 // Minimum - first, maximum - second. 173 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 174 175 // A pair of default/requested minimum/maximum number of waves per execution 176 // unit. Minimum - first, maximum - second. 177 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 178 179 DenseMap<const Value *, 180 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 181 DenseMap<const Value *, 182 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 183 184 private: 185 unsigned LDSWaveSpillSize = 0; 186 unsigned NumUserSGPRs = 0; 187 unsigned NumSystemSGPRs = 0; 188 189 bool HasSpilledSGPRs = false; 190 bool HasSpilledVGPRs = false; 191 bool HasNonSpillStackObjects = false; 192 bool IsStackRealigned = false; 193 194 unsigned NumSpilledSGPRs = 0; 195 unsigned NumSpilledVGPRs = 0; 196 197 // Feature bits required for inputs passed in user SGPRs. 198 bool PrivateSegmentBuffer : 1; 199 bool DispatchPtr : 1; 200 bool QueuePtr : 1; 201 bool KernargSegmentPtr : 1; 202 bool DispatchID : 1; 203 bool FlatScratchInit : 1; 204 205 // Feature bits required for inputs passed in system SGPRs. 206 bool WorkGroupIDX : 1; // Always initialized. 207 bool WorkGroupIDY : 1; 208 bool WorkGroupIDZ : 1; 209 bool WorkGroupInfo : 1; 210 bool PrivateSegmentWaveByteOffset : 1; 211 212 bool WorkItemIDX : 1; // Always initialized. 213 bool WorkItemIDY : 1; 214 bool WorkItemIDZ : 1; 215 216 // Private memory buffer 217 // Compute directly in sgpr[0:1] 218 // Other shaders indirect 64-bits at sgpr[0:1] 219 bool ImplicitBufferPtr : 1; 220 221 // Pointer to where the ABI inserts special kernel arguments separate from the 222 // user arguments. This is an offset from the KernargSegmentPtr. 223 bool ImplicitArgPtr : 1; 224 225 // The hard-wired high half of the address of the global information table 226 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 227 // current hardware only allows a 16 bit value. 228 unsigned GITPtrHigh; 229 230 unsigned HighBitsOf32BitAddress; 231 232 // Current recorded maximum possible occupancy. 233 unsigned Occupancy; 234 235 MCPhysReg getNextUserSGPR() const; 236 237 MCPhysReg getNextSystemSGPR() const; 238 239 public: 240 struct SpilledReg { 241 unsigned VGPR = 0; 242 int Lane = -1; 243 244 SpilledReg() = default; 245 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 246 247 bool hasLane() { return Lane != -1;} 248 bool hasReg() { return VGPR != 0;} 249 }; 250 251 struct SGPRSpillVGPRCSR { 252 // VGPR used for SGPR spills 253 unsigned VGPR; 254 255 // If the VGPR is a CSR, the stack slot used to save/restore it in the 256 // prolog/epilog. 257 Optional<int> FI; 258 259 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 260 }; 261 262 private: 263 // SGPR->VGPR spilling support. 264 using SpillRegMask = std::pair<unsigned, unsigned>; 265 266 // Track VGPR + wave index for each subregister of the SGPR spilled to 267 // frameindex key. 268 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 269 unsigned NumVGPRSpillLanes = 0; 270 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 271 272 public: 273 SIMachineFunctionInfo(const MachineFunction &MF); 274 275 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); 276 277 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 278 auto I = SGPRToVGPRSpills.find(FrameIndex); 279 return (I == SGPRToVGPRSpills.end()) ? 280 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 281 } 282 283 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 284 return SpillVGPRs; 285 } 286 287 AMDGPU::SIModeRegisterDefaults getMode() const { 288 return Mode; 289 } 290 291 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 292 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 293 294 bool hasCalculatedTID() const { return TIDReg != 0; }; 295 unsigned getTIDReg() const { return TIDReg; }; 296 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 297 298 unsigned getBytesInStackArgArea() const { 299 return BytesInStackArgArea; 300 } 301 302 void setBytesInStackArgArea(unsigned Bytes) { 303 BytesInStackArgArea = Bytes; 304 } 305 306 // Add user SGPRs. 307 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 308 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 309 unsigned addQueuePtr(const SIRegisterInfo &TRI); 310 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 311 unsigned addDispatchID(const SIRegisterInfo &TRI); 312 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 313 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 314 315 // Add system SGPRs. 316 unsigned addWorkGroupIDX() { 317 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 318 NumSystemSGPRs += 1; 319 return ArgInfo.WorkGroupIDX.getRegister(); 320 } 321 322 unsigned addWorkGroupIDY() { 323 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 324 NumSystemSGPRs += 1; 325 return ArgInfo.WorkGroupIDY.getRegister(); 326 } 327 328 unsigned addWorkGroupIDZ() { 329 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 330 NumSystemSGPRs += 1; 331 return ArgInfo.WorkGroupIDZ.getRegister(); 332 } 333 334 unsigned addWorkGroupInfo() { 335 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 336 NumSystemSGPRs += 1; 337 return ArgInfo.WorkGroupInfo.getRegister(); 338 } 339 340 // Add special VGPR inputs 341 void setWorkItemIDX(ArgDescriptor Arg) { 342 ArgInfo.WorkItemIDX = Arg; 343 } 344 345 void setWorkItemIDY(ArgDescriptor Arg) { 346 ArgInfo.WorkItemIDY = Arg; 347 } 348 349 void setWorkItemIDZ(ArgDescriptor Arg) { 350 ArgInfo.WorkItemIDZ = Arg; 351 } 352 353 unsigned addPrivateSegmentWaveByteOffset() { 354 ArgInfo.PrivateSegmentWaveByteOffset 355 = ArgDescriptor::createRegister(getNextSystemSGPR()); 356 NumSystemSGPRs += 1; 357 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 358 } 359 360 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 361 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 362 } 363 364 bool hasPrivateSegmentBuffer() const { 365 return PrivateSegmentBuffer; 366 } 367 368 bool hasDispatchPtr() const { 369 return DispatchPtr; 370 } 371 372 bool hasQueuePtr() const { 373 return QueuePtr; 374 } 375 376 bool hasKernargSegmentPtr() const { 377 return KernargSegmentPtr; 378 } 379 380 bool hasDispatchID() const { 381 return DispatchID; 382 } 383 384 bool hasFlatScratchInit() const { 385 return FlatScratchInit; 386 } 387 388 bool hasWorkGroupIDX() const { 389 return WorkGroupIDX; 390 } 391 392 bool hasWorkGroupIDY() const { 393 return WorkGroupIDY; 394 } 395 396 bool hasWorkGroupIDZ() const { 397 return WorkGroupIDZ; 398 } 399 400 bool hasWorkGroupInfo() const { 401 return WorkGroupInfo; 402 } 403 404 bool hasPrivateSegmentWaveByteOffset() const { 405 return PrivateSegmentWaveByteOffset; 406 } 407 408 bool hasWorkItemIDX() const { 409 return WorkItemIDX; 410 } 411 412 bool hasWorkItemIDY() const { 413 return WorkItemIDY; 414 } 415 416 bool hasWorkItemIDZ() const { 417 return WorkItemIDZ; 418 } 419 420 bool hasImplicitArgPtr() const { 421 return ImplicitArgPtr; 422 } 423 424 bool hasImplicitBufferPtr() const { 425 return ImplicitBufferPtr; 426 } 427 428 AMDGPUFunctionArgInfo &getArgInfo() { 429 return ArgInfo; 430 } 431 432 const AMDGPUFunctionArgInfo &getArgInfo() const { 433 return ArgInfo; 434 } 435 436 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 437 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 438 return ArgInfo.getPreloadedValue(Value); 439 } 440 441 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 442 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 443 } 444 445 unsigned getGITPtrHigh() const { 446 return GITPtrHigh; 447 } 448 449 unsigned get32BitAddressHighBits() const { 450 return HighBitsOf32BitAddress; 451 } 452 453 unsigned getNumUserSGPRs() const { 454 return NumUserSGPRs; 455 } 456 457 unsigned getNumPreloadedSGPRs() const { 458 return NumUserSGPRs + NumSystemSGPRs; 459 } 460 461 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 462 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 463 } 464 465 /// Returns the physical register reserved for use as the resource 466 /// descriptor for scratch accesses. 467 unsigned getScratchRSrcReg() const { 468 return ScratchRSrcReg; 469 } 470 471 void setScratchRSrcReg(unsigned Reg) { 472 assert(Reg != 0 && "Should never be unset"); 473 ScratchRSrcReg = Reg; 474 } 475 476 unsigned getScratchWaveOffsetReg() const { 477 return ScratchWaveOffsetReg; 478 } 479 480 unsigned getFrameOffsetReg() const { 481 return FrameOffsetReg; 482 } 483 484 void setStackPtrOffsetReg(unsigned Reg) { 485 assert(Reg != 0 && "Should never be unset"); 486 StackPtrOffsetReg = Reg; 487 } 488 489 // Note the unset value for this is AMDGPU::SP_REG rather than 490 // NoRegister. This is mostly a workaround for MIR tests where state that 491 // can't be directly computed from the function is not preserved in serialized 492 // MIR. 493 unsigned getStackPtrOffsetReg() const { 494 return StackPtrOffsetReg; 495 } 496 497 void setScratchWaveOffsetReg(unsigned Reg) { 498 assert(Reg != 0 && "Should never be unset"); 499 ScratchWaveOffsetReg = Reg; 500 if (isEntryFunction()) 501 FrameOffsetReg = ScratchWaveOffsetReg; 502 } 503 504 unsigned getQueuePtrUserSGPR() const { 505 return ArgInfo.QueuePtr.getRegister(); 506 } 507 508 unsigned getImplicitBufferPtrUserSGPR() const { 509 return ArgInfo.ImplicitBufferPtr.getRegister(); 510 } 511 512 bool hasSpilledSGPRs() const { 513 return HasSpilledSGPRs; 514 } 515 516 void setHasSpilledSGPRs(bool Spill = true) { 517 HasSpilledSGPRs = Spill; 518 } 519 520 bool hasSpilledVGPRs() const { 521 return HasSpilledVGPRs; 522 } 523 524 void setHasSpilledVGPRs(bool Spill = true) { 525 HasSpilledVGPRs = Spill; 526 } 527 528 bool hasNonSpillStackObjects() const { 529 return HasNonSpillStackObjects; 530 } 531 532 void setHasNonSpillStackObjects(bool StackObject = true) { 533 HasNonSpillStackObjects = StackObject; 534 } 535 536 bool isStackRealigned() const { 537 return IsStackRealigned; 538 } 539 540 void setIsStackRealigned(bool Realigned = true) { 541 IsStackRealigned = Realigned; 542 } 543 544 unsigned getNumSpilledSGPRs() const { 545 return NumSpilledSGPRs; 546 } 547 548 unsigned getNumSpilledVGPRs() const { 549 return NumSpilledVGPRs; 550 } 551 552 void addToSpilledSGPRs(unsigned num) { 553 NumSpilledSGPRs += num; 554 } 555 556 void addToSpilledVGPRs(unsigned num) { 557 NumSpilledVGPRs += num; 558 } 559 560 unsigned getPSInputAddr() const { 561 return PSInputAddr; 562 } 563 564 unsigned getPSInputEnable() const { 565 return PSInputEnable; 566 } 567 568 bool isPSInputAllocated(unsigned Index) const { 569 return PSInputAddr & (1 << Index); 570 } 571 572 void markPSInputAllocated(unsigned Index) { 573 PSInputAddr |= 1 << Index; 574 } 575 576 void markPSInputEnabled(unsigned Index) { 577 PSInputEnable |= 1 << Index; 578 } 579 580 bool returnsVoid() const { 581 return ReturnsVoid; 582 } 583 584 void setIfReturnsVoid(bool Value) { 585 ReturnsVoid = Value; 586 } 587 588 /// \returns A pair of default/requested minimum/maximum flat work group sizes 589 /// for this function. 590 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 591 return FlatWorkGroupSizes; 592 } 593 594 /// \returns Default/requested minimum flat work group size for this function. 595 unsigned getMinFlatWorkGroupSize() const { 596 return FlatWorkGroupSizes.first; 597 } 598 599 /// \returns Default/requested maximum flat work group size for this function. 600 unsigned getMaxFlatWorkGroupSize() const { 601 return FlatWorkGroupSizes.second; 602 } 603 604 /// \returns A pair of default/requested minimum/maximum number of waves per 605 /// execution unit. 606 std::pair<unsigned, unsigned> getWavesPerEU() const { 607 return WavesPerEU; 608 } 609 610 /// \returns Default/requested minimum number of waves per execution unit. 611 unsigned getMinWavesPerEU() const { 612 return WavesPerEU.first; 613 } 614 615 /// \returns Default/requested maximum number of waves per execution unit. 616 unsigned getMaxWavesPerEU() const { 617 return WavesPerEU.second; 618 } 619 620 /// \returns SGPR used for \p Dim's work group ID. 621 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 622 switch (Dim) { 623 case 0: 624 assert(hasWorkGroupIDX()); 625 return ArgInfo.WorkGroupIDX.getRegister(); 626 case 1: 627 assert(hasWorkGroupIDY()); 628 return ArgInfo.WorkGroupIDY.getRegister(); 629 case 2: 630 assert(hasWorkGroupIDZ()); 631 return ArgInfo.WorkGroupIDZ.getRegister(); 632 } 633 llvm_unreachable("unexpected dimension"); 634 } 635 636 /// \returns VGPR used for \p Dim' work item ID. 637 unsigned getWorkItemIDVGPR(unsigned Dim) const; 638 639 unsigned getLDSWaveSpillSize() const { 640 return LDSWaveSpillSize; 641 } 642 643 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 644 const Value *BufferRsrc) { 645 assert(BufferRsrc); 646 auto PSV = BufferPSVs.try_emplace( 647 BufferRsrc, 648 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 649 return PSV.first->second.get(); 650 } 651 652 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 653 const Value *ImgRsrc) { 654 assert(ImgRsrc); 655 auto PSV = ImagePSVs.try_emplace( 656 ImgRsrc, 657 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 658 return PSV.first->second.get(); 659 } 660 661 unsigned getOccupancy() const { 662 return Occupancy; 663 } 664 665 unsigned getMinAllowedOccupancy() const { 666 if (!isMemoryBound() && !needsWaveLimiter()) 667 return Occupancy; 668 return (Occupancy < 4) ? Occupancy : 4; 669 } 670 671 void limitOccupancy(const MachineFunction &MF); 672 673 void limitOccupancy(unsigned Limit) { 674 if (Occupancy > Limit) 675 Occupancy = Limit; 676 } 677 678 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 679 if (Occupancy < Limit) 680 Occupancy = Limit; 681 limitOccupancy(MF); 682 } 683 }; 684 685 } // end namespace llvm 686 687 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 688