1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/CodeGen/PseudoSourceValue.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <array> 31 #include <cassert> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class MachineFrameInfo; 38 class MachineFunction; 39 class TargetRegisterClass; 40 41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 42 public: 43 // TODO: Is the img rsrc useful? 44 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 45 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 46 47 bool isConstant(const MachineFrameInfo *) const override { 48 // This should probably be true for most images, but we will start by being 49 // conservative. 50 return false; 51 } 52 53 bool isAliased(const MachineFrameInfo *) const override { 54 return true; 55 } 56 57 bool mayAlias(const MachineFrameInfo *) const override { 58 return true; 59 } 60 }; 61 62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 63 public: 64 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 65 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 66 67 bool isConstant(const MachineFrameInfo *) const override { 68 // This should probably be true for most images, but we will start by being 69 // conservative. 70 return false; 71 } 72 73 bool isAliased(const MachineFrameInfo *) const override { 74 return true; 75 } 76 77 bool mayAlias(const MachineFrameInfo *) const override { 78 return true; 79 } 80 }; 81 82 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 83 /// tells the hardware which interpolation parameters to load. 84 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 85 unsigned TIDReg = AMDGPU::NoRegister; 86 87 // Registers that may be reserved for spilling purposes. These may be the same 88 // as the input registers. 89 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 90 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 91 92 // This is the current function's incremented size from the kernel's scratch 93 // wave offset register. For an entry function, this is exactly the same as 94 // the ScratchWaveOffsetReg. 95 unsigned FrameOffsetReg = AMDGPU::FP_REG; 96 97 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 98 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 99 100 AMDGPUFunctionArgInfo ArgInfo; 101 102 // Graphics info. 103 unsigned PSInputAddr = 0; 104 unsigned PSInputEnable = 0; 105 106 /// Number of bytes of arguments this function has on the stack. If the callee 107 /// is expected to restore the argument stack this should be a multiple of 16, 108 /// all usable during a tail call. 109 /// 110 /// The alternative would forbid tail call optimisation in some cases: if we 111 /// want to transfer control from a function with 8-bytes of stack-argument 112 /// space to a function with 16-bytes then misalignment of this value would 113 /// make a stack adjustment necessary, which could not be undone by the 114 /// callee. 115 unsigned BytesInStackArgArea = 0; 116 117 bool ReturnsVoid = true; 118 119 // A pair of default/requested minimum/maximum flat work group sizes. 120 // Minimum - first, maximum - second. 121 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 122 123 // A pair of default/requested minimum/maximum number of waves per execution 124 // unit. Minimum - first, maximum - second. 125 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 126 127 // Stack object indices for work group IDs. 128 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 129 130 // Stack object indices for work item IDs. 131 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 132 133 DenseMap<const Value *, 134 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 135 DenseMap<const Value *, 136 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 137 138 private: 139 unsigned LDSWaveSpillSize = 0; 140 unsigned NumUserSGPRs = 0; 141 unsigned NumSystemSGPRs = 0; 142 143 bool HasSpilledSGPRs = false; 144 bool HasSpilledVGPRs = false; 145 bool HasNonSpillStackObjects = false; 146 bool IsStackRealigned = false; 147 148 unsigned NumSpilledSGPRs = 0; 149 unsigned NumSpilledVGPRs = 0; 150 151 // Feature bits required for inputs passed in user SGPRs. 152 bool PrivateSegmentBuffer : 1; 153 bool DispatchPtr : 1; 154 bool QueuePtr : 1; 155 bool KernargSegmentPtr : 1; 156 bool DispatchID : 1; 157 bool FlatScratchInit : 1; 158 bool GridWorkgroupCountX : 1; 159 bool GridWorkgroupCountY : 1; 160 bool GridWorkgroupCountZ : 1; 161 162 // Feature bits required for inputs passed in system SGPRs. 163 bool WorkGroupIDX : 1; // Always initialized. 164 bool WorkGroupIDY : 1; 165 bool WorkGroupIDZ : 1; 166 bool WorkGroupInfo : 1; 167 bool PrivateSegmentWaveByteOffset : 1; 168 169 bool WorkItemIDX : 1; // Always initialized. 170 bool WorkItemIDY : 1; 171 bool WorkItemIDZ : 1; 172 173 // Private memory buffer 174 // Compute directly in sgpr[0:1] 175 // Other shaders indirect 64-bits at sgpr[0:1] 176 bool ImplicitBufferPtr : 1; 177 178 // Pointer to where the ABI inserts special kernel arguments separate from the 179 // user arguments. This is an offset from the KernargSegmentPtr. 180 bool ImplicitArgPtr : 1; 181 182 // The hard-wired high half of the address of the global information table 183 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 184 // current hardware only allows a 16 bit value. 185 unsigned GITPtrHigh; 186 187 unsigned HighBitsOf32BitAddress; 188 189 // Current recorded maximum possible occupancy. 190 unsigned Occupancy; 191 192 MCPhysReg getNextUserSGPR() const; 193 194 MCPhysReg getNextSystemSGPR() const; 195 196 public: 197 struct SpilledReg { 198 unsigned VGPR = 0; 199 int Lane = -1; 200 201 SpilledReg() = default; 202 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 203 204 bool hasLane() { return Lane != -1;} 205 bool hasReg() { return VGPR != 0;} 206 }; 207 208 struct SGPRSpillVGPRCSR { 209 // VGPR used for SGPR spills 210 unsigned VGPR; 211 212 // If the VGPR is a CSR, the stack slot used to save/restore it in the 213 // prolog/epilog. 214 Optional<int> FI; 215 216 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 217 }; 218 219 private: 220 // SGPR->VGPR spilling support. 221 using SpillRegMask = std::pair<unsigned, unsigned>; 222 223 // Track VGPR + wave index for each subregister of the SGPR spilled to 224 // frameindex key. 225 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 226 unsigned NumVGPRSpillLanes = 0; 227 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 228 229 public: 230 SIMachineFunctionInfo(const MachineFunction &MF); 231 232 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 233 auto I = SGPRToVGPRSpills.find(FrameIndex); 234 return (I == SGPRToVGPRSpills.end()) ? 235 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 236 } 237 238 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 239 return SpillVGPRs; 240 } 241 242 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 243 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 244 245 bool hasCalculatedTID() const { return TIDReg != 0; }; 246 unsigned getTIDReg() const { return TIDReg; }; 247 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 248 249 unsigned getBytesInStackArgArea() const { 250 return BytesInStackArgArea; 251 } 252 253 void setBytesInStackArgArea(unsigned Bytes) { 254 BytesInStackArgArea = Bytes; 255 } 256 257 // Add user SGPRs. 258 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 259 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 260 unsigned addQueuePtr(const SIRegisterInfo &TRI); 261 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 262 unsigned addDispatchID(const SIRegisterInfo &TRI); 263 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 264 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 265 266 // Add system SGPRs. 267 unsigned addWorkGroupIDX() { 268 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 269 NumSystemSGPRs += 1; 270 return ArgInfo.WorkGroupIDX.getRegister(); 271 } 272 273 unsigned addWorkGroupIDY() { 274 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 275 NumSystemSGPRs += 1; 276 return ArgInfo.WorkGroupIDY.getRegister(); 277 } 278 279 unsigned addWorkGroupIDZ() { 280 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 281 NumSystemSGPRs += 1; 282 return ArgInfo.WorkGroupIDZ.getRegister(); 283 } 284 285 unsigned addWorkGroupInfo() { 286 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 287 NumSystemSGPRs += 1; 288 return ArgInfo.WorkGroupInfo.getRegister(); 289 } 290 291 // Add special VGPR inputs 292 void setWorkItemIDX(ArgDescriptor Arg) { 293 ArgInfo.WorkItemIDX = Arg; 294 } 295 296 void setWorkItemIDY(ArgDescriptor Arg) { 297 ArgInfo.WorkItemIDY = Arg; 298 } 299 300 void setWorkItemIDZ(ArgDescriptor Arg) { 301 ArgInfo.WorkItemIDZ = Arg; 302 } 303 304 unsigned addPrivateSegmentWaveByteOffset() { 305 ArgInfo.PrivateSegmentWaveByteOffset 306 = ArgDescriptor::createRegister(getNextSystemSGPR()); 307 NumSystemSGPRs += 1; 308 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 309 } 310 311 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 312 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 313 } 314 315 bool hasPrivateSegmentBuffer() const { 316 return PrivateSegmentBuffer; 317 } 318 319 bool hasDispatchPtr() const { 320 return DispatchPtr; 321 } 322 323 bool hasQueuePtr() const { 324 return QueuePtr; 325 } 326 327 bool hasKernargSegmentPtr() const { 328 return KernargSegmentPtr; 329 } 330 331 bool hasDispatchID() const { 332 return DispatchID; 333 } 334 335 bool hasFlatScratchInit() const { 336 return FlatScratchInit; 337 } 338 339 bool hasGridWorkgroupCountX() const { 340 return GridWorkgroupCountX; 341 } 342 343 bool hasGridWorkgroupCountY() const { 344 return GridWorkgroupCountY; 345 } 346 347 bool hasGridWorkgroupCountZ() const { 348 return GridWorkgroupCountZ; 349 } 350 351 bool hasWorkGroupIDX() const { 352 return WorkGroupIDX; 353 } 354 355 bool hasWorkGroupIDY() const { 356 return WorkGroupIDY; 357 } 358 359 bool hasWorkGroupIDZ() const { 360 return WorkGroupIDZ; 361 } 362 363 bool hasWorkGroupInfo() const { 364 return WorkGroupInfo; 365 } 366 367 bool hasPrivateSegmentWaveByteOffset() const { 368 return PrivateSegmentWaveByteOffset; 369 } 370 371 bool hasWorkItemIDX() const { 372 return WorkItemIDX; 373 } 374 375 bool hasWorkItemIDY() const { 376 return WorkItemIDY; 377 } 378 379 bool hasWorkItemIDZ() const { 380 return WorkItemIDZ; 381 } 382 383 bool hasImplicitArgPtr() const { 384 return ImplicitArgPtr; 385 } 386 387 bool hasImplicitBufferPtr() const { 388 return ImplicitBufferPtr; 389 } 390 391 AMDGPUFunctionArgInfo &getArgInfo() { 392 return ArgInfo; 393 } 394 395 const AMDGPUFunctionArgInfo &getArgInfo() const { 396 return ArgInfo; 397 } 398 399 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 400 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 401 return ArgInfo.getPreloadedValue(Value); 402 } 403 404 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 405 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 406 } 407 408 unsigned getGITPtrHigh() const { 409 return GITPtrHigh; 410 } 411 412 unsigned get32BitAddressHighBits() const { 413 return HighBitsOf32BitAddress; 414 } 415 416 unsigned getNumUserSGPRs() const { 417 return NumUserSGPRs; 418 } 419 420 unsigned getNumPreloadedSGPRs() const { 421 return NumUserSGPRs + NumSystemSGPRs; 422 } 423 424 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 425 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 426 } 427 428 /// Returns the physical register reserved for use as the resource 429 /// descriptor for scratch accesses. 430 unsigned getScratchRSrcReg() const { 431 return ScratchRSrcReg; 432 } 433 434 void setScratchRSrcReg(unsigned Reg) { 435 assert(Reg != 0 && "Should never be unset"); 436 ScratchRSrcReg = Reg; 437 } 438 439 unsigned getScratchWaveOffsetReg() const { 440 return ScratchWaveOffsetReg; 441 } 442 443 unsigned getFrameOffsetReg() const { 444 return FrameOffsetReg; 445 } 446 447 void setStackPtrOffsetReg(unsigned Reg) { 448 assert(Reg != 0 && "Should never be unset"); 449 StackPtrOffsetReg = Reg; 450 } 451 452 // Note the unset value for this is AMDGPU::SP_REG rather than 453 // NoRegister. This is mostly a workaround for MIR tests where state that 454 // can't be directly computed from the function is not preserved in serialized 455 // MIR. 456 unsigned getStackPtrOffsetReg() const { 457 return StackPtrOffsetReg; 458 } 459 460 void setScratchWaveOffsetReg(unsigned Reg) { 461 assert(Reg != 0 && "Should never be unset"); 462 ScratchWaveOffsetReg = Reg; 463 if (isEntryFunction()) 464 FrameOffsetReg = ScratchWaveOffsetReg; 465 } 466 467 unsigned getQueuePtrUserSGPR() const { 468 return ArgInfo.QueuePtr.getRegister(); 469 } 470 471 unsigned getImplicitBufferPtrUserSGPR() const { 472 return ArgInfo.ImplicitBufferPtr.getRegister(); 473 } 474 475 bool hasSpilledSGPRs() const { 476 return HasSpilledSGPRs; 477 } 478 479 void setHasSpilledSGPRs(bool Spill = true) { 480 HasSpilledSGPRs = Spill; 481 } 482 483 bool hasSpilledVGPRs() const { 484 return HasSpilledVGPRs; 485 } 486 487 void setHasSpilledVGPRs(bool Spill = true) { 488 HasSpilledVGPRs = Spill; 489 } 490 491 bool hasNonSpillStackObjects() const { 492 return HasNonSpillStackObjects; 493 } 494 495 void setHasNonSpillStackObjects(bool StackObject = true) { 496 HasNonSpillStackObjects = StackObject; 497 } 498 499 bool isStackRealigned() const { 500 return IsStackRealigned; 501 } 502 503 void setIsStackRealigned(bool Realigned = true) { 504 IsStackRealigned = Realigned; 505 } 506 507 unsigned getNumSpilledSGPRs() const { 508 return NumSpilledSGPRs; 509 } 510 511 unsigned getNumSpilledVGPRs() const { 512 return NumSpilledVGPRs; 513 } 514 515 void addToSpilledSGPRs(unsigned num) { 516 NumSpilledSGPRs += num; 517 } 518 519 void addToSpilledVGPRs(unsigned num) { 520 NumSpilledVGPRs += num; 521 } 522 523 unsigned getPSInputAddr() const { 524 return PSInputAddr; 525 } 526 527 unsigned getPSInputEnable() const { 528 return PSInputEnable; 529 } 530 531 bool isPSInputAllocated(unsigned Index) const { 532 return PSInputAddr & (1 << Index); 533 } 534 535 void markPSInputAllocated(unsigned Index) { 536 PSInputAddr |= 1 << Index; 537 } 538 539 void markPSInputEnabled(unsigned Index) { 540 PSInputEnable |= 1 << Index; 541 } 542 543 bool returnsVoid() const { 544 return ReturnsVoid; 545 } 546 547 void setIfReturnsVoid(bool Value) { 548 ReturnsVoid = Value; 549 } 550 551 /// \returns A pair of default/requested minimum/maximum flat work group sizes 552 /// for this function. 553 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 554 return FlatWorkGroupSizes; 555 } 556 557 /// \returns Default/requested minimum flat work group size for this function. 558 unsigned getMinFlatWorkGroupSize() const { 559 return FlatWorkGroupSizes.first; 560 } 561 562 /// \returns Default/requested maximum flat work group size for this function. 563 unsigned getMaxFlatWorkGroupSize() const { 564 return FlatWorkGroupSizes.second; 565 } 566 567 /// \returns A pair of default/requested minimum/maximum number of waves per 568 /// execution unit. 569 std::pair<unsigned, unsigned> getWavesPerEU() const { 570 return WavesPerEU; 571 } 572 573 /// \returns Default/requested minimum number of waves per execution unit. 574 unsigned getMinWavesPerEU() const { 575 return WavesPerEU.first; 576 } 577 578 /// \returns Default/requested maximum number of waves per execution unit. 579 unsigned getMaxWavesPerEU() const { 580 return WavesPerEU.second; 581 } 582 583 /// \returns Stack object index for \p Dim's work group ID. 584 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 585 assert(Dim < 3); 586 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 587 } 588 589 /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 590 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 591 assert(Dim < 3); 592 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 593 } 594 595 /// \returns Stack object index for \p Dim's work item ID. 596 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 597 assert(Dim < 3); 598 return DebuggerWorkItemIDStackObjectIndices[Dim]; 599 } 600 601 /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 602 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 603 assert(Dim < 3); 604 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 605 } 606 607 /// \returns SGPR used for \p Dim's work group ID. 608 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 609 switch (Dim) { 610 case 0: 611 assert(hasWorkGroupIDX()); 612 return ArgInfo.WorkGroupIDX.getRegister(); 613 case 1: 614 assert(hasWorkGroupIDY()); 615 return ArgInfo.WorkGroupIDY.getRegister(); 616 case 2: 617 assert(hasWorkGroupIDZ()); 618 return ArgInfo.WorkGroupIDZ.getRegister(); 619 } 620 llvm_unreachable("unexpected dimension"); 621 } 622 623 /// \returns VGPR used for \p Dim' work item ID. 624 unsigned getWorkItemIDVGPR(unsigned Dim) const; 625 626 unsigned getLDSWaveSpillSize() const { 627 return LDSWaveSpillSize; 628 } 629 630 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 631 const Value *BufferRsrc) { 632 assert(BufferRsrc); 633 auto PSV = BufferPSVs.try_emplace( 634 BufferRsrc, 635 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 636 return PSV.first->second.get(); 637 } 638 639 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 640 const Value *ImgRsrc) { 641 assert(ImgRsrc); 642 auto PSV = ImagePSVs.try_emplace( 643 ImgRsrc, 644 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 645 return PSV.first->second.get(); 646 } 647 648 unsigned getOccupancy() const { 649 return Occupancy; 650 } 651 652 unsigned getMinAllowedOccupancy() const { 653 if (!isMemoryBound() && !needsWaveLimiter()) 654 return Occupancy; 655 return (Occupancy < 4) ? Occupancy : 4; 656 } 657 658 void limitOccupancy(const MachineFunction &MF); 659 660 void limitOccupancy(unsigned Limit) { 661 if (Occupancy > Limit) 662 Occupancy = Limit; 663 } 664 665 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 666 if (Occupancy < Limit) 667 Occupancy = Limit; 668 limitOccupancy(MF); 669 } 670 }; 671 672 } // end namespace llvm 673 674 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 675