1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include <array> 29 #include <cassert> 30 #include <utility> 31 #include <vector> 32 33 namespace llvm { 34 35 class MachineFrameInfo; 36 class MachineFunction; 37 class SIInstrInfo; 38 class TargetRegisterClass; 39 40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 41 public: 42 // TODO: Is the img rsrc useful? 43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 45 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 // FIXME: If we ever change image intrinsics to accept fat pointers, then 54 // this could be true for some cases. 55 return false; 56 } 57 58 bool mayAlias(const MachineFrameInfo *) const override { 59 // FIXME: If we ever change image intrinsics to accept fat pointers, then 60 // this could be true for some cases. 61 return false; 62 } 63 }; 64 65 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 66 public: 67 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 68 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 69 70 bool isConstant(const MachineFrameInfo *) const override { 71 // This should probably be true for most images, but we will start by being 72 // conservative. 73 return false; 74 } 75 76 bool isAliased(const MachineFrameInfo *) const override { 77 // FIXME: If we ever change image intrinsics to accept fat pointers, then 78 // this could be true for some cases. 79 return false; 80 } 81 82 bool mayAlias(const MachineFrameInfo *) const override { 83 // FIXME: If we ever change image intrinsics to accept fat pointers, then 84 // this could be true for some cases. 85 return false; 86 } 87 }; 88 89 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 90 /// tells the hardware which interpolation parameters to load. 91 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 92 unsigned TIDReg = AMDGPU::NoRegister; 93 94 // Registers that may be reserved for spilling purposes. These may be the same 95 // as the input registers. 96 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 97 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 98 99 // This is the current function's incremented size from the kernel's scratch 100 // wave offset register. For an entry function, this is exactly the same as 101 // the ScratchWaveOffsetReg. 102 unsigned FrameOffsetReg = AMDGPU::FP_REG; 103 104 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 105 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 106 107 AMDGPUFunctionArgInfo ArgInfo; 108 109 // Graphics info. 110 unsigned PSInputAddr = 0; 111 unsigned PSInputEnable = 0; 112 113 /// Number of bytes of arguments this function has on the stack. If the callee 114 /// is expected to restore the argument stack this should be a multiple of 16, 115 /// all usable during a tail call. 116 /// 117 /// The alternative would forbid tail call optimisation in some cases: if we 118 /// want to transfer control from a function with 8-bytes of stack-argument 119 /// space to a function with 16-bytes then misalignment of this value would 120 /// make a stack adjustment necessary, which could not be undone by the 121 /// callee. 122 unsigned BytesInStackArgArea = 0; 123 124 bool ReturnsVoid = true; 125 126 // A pair of default/requested minimum/maximum flat work group sizes. 127 // Minimum - first, maximum - second. 128 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 129 130 // A pair of default/requested minimum/maximum number of waves per execution 131 // unit. Minimum - first, maximum - second. 132 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 133 134 // Stack object indices for work group IDs. 135 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 136 137 // Stack object indices for work item IDs. 138 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 139 140 DenseMap<const Value *, 141 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 142 DenseMap<const Value *, 143 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 144 145 private: 146 unsigned LDSWaveSpillSize = 0; 147 unsigned NumUserSGPRs = 0; 148 unsigned NumSystemSGPRs = 0; 149 150 bool HasSpilledSGPRs = false; 151 bool HasSpilledVGPRs = false; 152 bool HasNonSpillStackObjects = false; 153 154 unsigned NumSpilledSGPRs = 0; 155 unsigned NumSpilledVGPRs = 0; 156 157 // Feature bits required for inputs passed in user SGPRs. 158 bool PrivateSegmentBuffer : 1; 159 bool DispatchPtr : 1; 160 bool QueuePtr : 1; 161 bool KernargSegmentPtr : 1; 162 bool DispatchID : 1; 163 bool FlatScratchInit : 1; 164 bool GridWorkgroupCountX : 1; 165 bool GridWorkgroupCountY : 1; 166 bool GridWorkgroupCountZ : 1; 167 168 // Feature bits required for inputs passed in system SGPRs. 169 bool WorkGroupIDX : 1; // Always initialized. 170 bool WorkGroupIDY : 1; 171 bool WorkGroupIDZ : 1; 172 bool WorkGroupInfo : 1; 173 bool PrivateSegmentWaveByteOffset : 1; 174 175 bool WorkItemIDX : 1; // Always initialized. 176 bool WorkItemIDY : 1; 177 bool WorkItemIDZ : 1; 178 179 // Private memory buffer 180 // Compute directly in sgpr[0:1] 181 // Other shaders indirect 64-bits at sgpr[0:1] 182 bool ImplicitBufferPtr : 1; 183 184 // Pointer to where the ABI inserts special kernel arguments separate from the 185 // user arguments. This is an offset from the KernargSegmentPtr. 186 bool ImplicitArgPtr : 1; 187 188 // The hard-wired high half of the address of the global information table 189 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 190 // current hardware only allows a 16 bit value. 191 unsigned GITPtrHigh; 192 193 MCPhysReg getNextUserSGPR() const { 194 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 195 return AMDGPU::SGPR0 + NumUserSGPRs; 196 } 197 198 MCPhysReg getNextSystemSGPR() const { 199 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 200 } 201 202 public: 203 struct SpilledReg { 204 unsigned VGPR = AMDGPU::NoRegister; 205 int Lane = -1; 206 207 SpilledReg() = default; 208 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 209 210 bool hasLane() { return Lane != -1;} 211 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 212 }; 213 214 struct SGPRSpillVGPRCSR { 215 // VGPR used for SGPR spills 216 unsigned VGPR; 217 218 // If the VGPR is a CSR, the stack slot used to save/restore it in the 219 // prolog/epilog. 220 Optional<int> FI; 221 222 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 223 }; 224 225 private: 226 // SGPR->VGPR spilling support. 227 using SpillRegMask = std::pair<unsigned, unsigned>; 228 229 // Track VGPR + wave index for each subregister of the SGPR spilled to 230 // frameindex key. 231 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 232 unsigned NumVGPRSpillLanes = 0; 233 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 234 235 public: 236 SIMachineFunctionInfo(const MachineFunction &MF); 237 238 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 239 auto I = SGPRToVGPRSpills.find(FrameIndex); 240 return (I == SGPRToVGPRSpills.end()) ? 241 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 242 } 243 244 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 245 return SpillVGPRs; 246 } 247 248 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 249 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 250 251 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } 252 unsigned getTIDReg() const { return TIDReg; } 253 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 254 255 unsigned getBytesInStackArgArea() const { 256 return BytesInStackArgArea; 257 } 258 259 void setBytesInStackArgArea(unsigned Bytes) { 260 BytesInStackArgArea = Bytes; 261 } 262 263 // Add user SGPRs. 264 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 265 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 266 unsigned addQueuePtr(const SIRegisterInfo &TRI); 267 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 268 unsigned addDispatchID(const SIRegisterInfo &TRI); 269 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 270 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 271 272 // Add system SGPRs. 273 unsigned addWorkGroupIDX() { 274 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 275 NumSystemSGPRs += 1; 276 return ArgInfo.WorkGroupIDX.getRegister(); 277 } 278 279 unsigned addWorkGroupIDY() { 280 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 281 NumSystemSGPRs += 1; 282 return ArgInfo.WorkGroupIDY.getRegister(); 283 } 284 285 unsigned addWorkGroupIDZ() { 286 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 287 NumSystemSGPRs += 1; 288 return ArgInfo.WorkGroupIDZ.getRegister(); 289 } 290 291 unsigned addWorkGroupInfo() { 292 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 293 NumSystemSGPRs += 1; 294 return ArgInfo.WorkGroupInfo.getRegister(); 295 } 296 297 // Add special VGPR inputs 298 void setWorkItemIDX(ArgDescriptor Arg) { 299 ArgInfo.WorkItemIDX = Arg; 300 } 301 302 void setWorkItemIDY(ArgDescriptor Arg) { 303 ArgInfo.WorkItemIDY = Arg; 304 } 305 306 void setWorkItemIDZ(ArgDescriptor Arg) { 307 ArgInfo.WorkItemIDZ = Arg; 308 } 309 310 unsigned addPrivateSegmentWaveByteOffset() { 311 ArgInfo.PrivateSegmentWaveByteOffset 312 = ArgDescriptor::createRegister(getNextSystemSGPR()); 313 NumSystemSGPRs += 1; 314 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 315 } 316 317 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 318 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 319 } 320 321 bool hasPrivateSegmentBuffer() const { 322 return PrivateSegmentBuffer; 323 } 324 325 bool hasDispatchPtr() const { 326 return DispatchPtr; 327 } 328 329 bool hasQueuePtr() const { 330 return QueuePtr; 331 } 332 333 bool hasKernargSegmentPtr() const { 334 return KernargSegmentPtr; 335 } 336 337 bool hasDispatchID() const { 338 return DispatchID; 339 } 340 341 bool hasFlatScratchInit() const { 342 return FlatScratchInit; 343 } 344 345 bool hasGridWorkgroupCountX() const { 346 return GridWorkgroupCountX; 347 } 348 349 bool hasGridWorkgroupCountY() const { 350 return GridWorkgroupCountY; 351 } 352 353 bool hasGridWorkgroupCountZ() const { 354 return GridWorkgroupCountZ; 355 } 356 357 bool hasWorkGroupIDX() const { 358 return WorkGroupIDX; 359 } 360 361 bool hasWorkGroupIDY() const { 362 return WorkGroupIDY; 363 } 364 365 bool hasWorkGroupIDZ() const { 366 return WorkGroupIDZ; 367 } 368 369 bool hasWorkGroupInfo() const { 370 return WorkGroupInfo; 371 } 372 373 bool hasPrivateSegmentWaveByteOffset() const { 374 return PrivateSegmentWaveByteOffset; 375 } 376 377 bool hasWorkItemIDX() const { 378 return WorkItemIDX; 379 } 380 381 bool hasWorkItemIDY() const { 382 return WorkItemIDY; 383 } 384 385 bool hasWorkItemIDZ() const { 386 return WorkItemIDZ; 387 } 388 389 bool hasImplicitArgPtr() const { 390 return ImplicitArgPtr; 391 } 392 393 bool hasImplicitBufferPtr() const { 394 return ImplicitBufferPtr; 395 } 396 397 AMDGPUFunctionArgInfo &getArgInfo() { 398 return ArgInfo; 399 } 400 401 const AMDGPUFunctionArgInfo &getArgInfo() const { 402 return ArgInfo; 403 } 404 405 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 406 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 407 return ArgInfo.getPreloadedValue(Value); 408 } 409 410 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 411 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 412 } 413 414 unsigned getGITPtrHigh() const { 415 return GITPtrHigh; 416 } 417 418 unsigned getNumUserSGPRs() const { 419 return NumUserSGPRs; 420 } 421 422 unsigned getNumPreloadedSGPRs() const { 423 return NumUserSGPRs + NumSystemSGPRs; 424 } 425 426 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 427 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 428 } 429 430 /// \brief Returns the physical register reserved for use as the resource 431 /// descriptor for scratch accesses. 432 unsigned getScratchRSrcReg() const { 433 return ScratchRSrcReg; 434 } 435 436 void setScratchRSrcReg(unsigned Reg) { 437 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 438 ScratchRSrcReg = Reg; 439 } 440 441 unsigned getScratchWaveOffsetReg() const { 442 return ScratchWaveOffsetReg; 443 } 444 445 unsigned getFrameOffsetReg() const { 446 return FrameOffsetReg; 447 } 448 449 void setStackPtrOffsetReg(unsigned Reg) { 450 StackPtrOffsetReg = Reg; 451 } 452 453 // Note the unset value for this is AMDGPU::SP_REG rather than 454 // NoRegister. This is mostly a workaround for MIR tests where state that 455 // can't be directly computed from the function is not preserved in serialized 456 // MIR. 457 unsigned getStackPtrOffsetReg() const { 458 return StackPtrOffsetReg; 459 } 460 461 void setScratchWaveOffsetReg(unsigned Reg) { 462 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 463 ScratchWaveOffsetReg = Reg; 464 if (isEntryFunction()) 465 FrameOffsetReg = ScratchWaveOffsetReg; 466 } 467 468 unsigned getQueuePtrUserSGPR() const { 469 return ArgInfo.QueuePtr.getRegister(); 470 } 471 472 unsigned getImplicitBufferPtrUserSGPR() const { 473 return ArgInfo.ImplicitBufferPtr.getRegister(); 474 } 475 476 bool hasSpilledSGPRs() const { 477 return HasSpilledSGPRs; 478 } 479 480 void setHasSpilledSGPRs(bool Spill = true) { 481 HasSpilledSGPRs = Spill; 482 } 483 484 bool hasSpilledVGPRs() const { 485 return HasSpilledVGPRs; 486 } 487 488 void setHasSpilledVGPRs(bool Spill = true) { 489 HasSpilledVGPRs = Spill; 490 } 491 492 bool hasNonSpillStackObjects() const { 493 return HasNonSpillStackObjects; 494 } 495 496 void setHasNonSpillStackObjects(bool StackObject = true) { 497 HasNonSpillStackObjects = StackObject; 498 } 499 500 unsigned getNumSpilledSGPRs() const { 501 return NumSpilledSGPRs; 502 } 503 504 unsigned getNumSpilledVGPRs() const { 505 return NumSpilledVGPRs; 506 } 507 508 void addToSpilledSGPRs(unsigned num) { 509 NumSpilledSGPRs += num; 510 } 511 512 void addToSpilledVGPRs(unsigned num) { 513 NumSpilledVGPRs += num; 514 } 515 516 unsigned getPSInputAddr() const { 517 return PSInputAddr; 518 } 519 520 unsigned getPSInputEnable() const { 521 return PSInputEnable; 522 } 523 524 bool isPSInputAllocated(unsigned Index) const { 525 return PSInputAddr & (1 << Index); 526 } 527 528 void markPSInputAllocated(unsigned Index) { 529 PSInputAddr |= 1 << Index; 530 } 531 532 void markPSInputEnabled(unsigned Index) { 533 PSInputEnable |= 1 << Index; 534 } 535 536 bool returnsVoid() const { 537 return ReturnsVoid; 538 } 539 540 void setIfReturnsVoid(bool Value) { 541 ReturnsVoid = Value; 542 } 543 544 /// \returns A pair of default/requested minimum/maximum flat work group sizes 545 /// for this function. 546 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 547 return FlatWorkGroupSizes; 548 } 549 550 /// \returns Default/requested minimum flat work group size for this function. 551 unsigned getMinFlatWorkGroupSize() const { 552 return FlatWorkGroupSizes.first; 553 } 554 555 /// \returns Default/requested maximum flat work group size for this function. 556 unsigned getMaxFlatWorkGroupSize() const { 557 return FlatWorkGroupSizes.second; 558 } 559 560 /// \returns A pair of default/requested minimum/maximum number of waves per 561 /// execution unit. 562 std::pair<unsigned, unsigned> getWavesPerEU() const { 563 return WavesPerEU; 564 } 565 566 /// \returns Default/requested minimum number of waves per execution unit. 567 unsigned getMinWavesPerEU() const { 568 return WavesPerEU.first; 569 } 570 571 /// \returns Default/requested maximum number of waves per execution unit. 572 unsigned getMaxWavesPerEU() const { 573 return WavesPerEU.second; 574 } 575 576 /// \returns Stack object index for \p Dim's work group ID. 577 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 578 assert(Dim < 3); 579 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 580 } 581 582 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 583 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 584 assert(Dim < 3); 585 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 586 } 587 588 /// \returns Stack object index for \p Dim's work item ID. 589 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 590 assert(Dim < 3); 591 return DebuggerWorkItemIDStackObjectIndices[Dim]; 592 } 593 594 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 595 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 596 assert(Dim < 3); 597 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 598 } 599 600 /// \returns SGPR used for \p Dim's work group ID. 601 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 602 switch (Dim) { 603 case 0: 604 assert(hasWorkGroupIDX()); 605 return ArgInfo.WorkGroupIDX.getRegister(); 606 case 1: 607 assert(hasWorkGroupIDY()); 608 return ArgInfo.WorkGroupIDY.getRegister(); 609 case 2: 610 assert(hasWorkGroupIDZ()); 611 return ArgInfo.WorkGroupIDZ.getRegister(); 612 } 613 llvm_unreachable("unexpected dimension"); 614 } 615 616 /// \returns VGPR used for \p Dim' work item ID. 617 unsigned getWorkItemIDVGPR(unsigned Dim) const { 618 switch (Dim) { 619 case 0: 620 assert(hasWorkItemIDX()); 621 return AMDGPU::VGPR0; 622 case 1: 623 assert(hasWorkItemIDY()); 624 return AMDGPU::VGPR1; 625 case 2: 626 assert(hasWorkItemIDZ()); 627 return AMDGPU::VGPR2; 628 } 629 llvm_unreachable("unexpected dimension"); 630 } 631 632 unsigned getLDSWaveSpillSize() const { 633 return LDSWaveSpillSize; 634 } 635 636 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 637 const Value *BufferRsrc) { 638 assert(BufferRsrc); 639 auto PSV = BufferPSVs.try_emplace( 640 BufferRsrc, 641 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 642 return PSV.first->second.get(); 643 } 644 645 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 646 const Value *ImgRsrc) { 647 assert(ImgRsrc); 648 auto PSV = ImagePSVs.try_emplace( 649 ImgRsrc, 650 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 651 return PSV.first->second.get(); 652 } 653 }; 654 655 } // end namespace llvm 656 657 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 658