1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include <array> 29 #include <cassert> 30 #include <utility> 31 #include <vector> 32 33 namespace llvm { 34 35 class MachineFrameInfo; 36 class MachineFunction; 37 class SIInstrInfo; 38 class TargetRegisterClass; 39 40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 41 public: 42 // TODO: Is the img rsrc useful? 43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 45 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 return true; 54 } 55 56 bool mayAlias(const MachineFrameInfo *) const override { 57 return true; 58 } 59 }; 60 61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 62 public: 63 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 64 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 65 66 bool isConstant(const MachineFrameInfo *) const override { 67 // This should probably be true for most images, but we will start by being 68 // conservative. 69 return false; 70 } 71 72 bool isAliased(const MachineFrameInfo *) const override { 73 // FIXME: If we ever change image intrinsics to accept fat pointers, then 74 // this could be true for some cases. 75 return false; 76 } 77 78 bool mayAlias(const MachineFrameInfo *) const override { 79 // FIXME: If we ever change image intrinsics to accept fat pointers, then 80 // this could be true for some cases. 81 return false; 82 } 83 }; 84 85 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 86 /// tells the hardware which interpolation parameters to load. 87 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 88 unsigned TIDReg = AMDGPU::NoRegister; 89 90 // Registers that may be reserved for spilling purposes. These may be the same 91 // as the input registers. 92 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 93 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 94 95 // This is the current function's incremented size from the kernel's scratch 96 // wave offset register. For an entry function, this is exactly the same as 97 // the ScratchWaveOffsetReg. 98 unsigned FrameOffsetReg = AMDGPU::FP_REG; 99 100 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 101 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 102 103 AMDGPUFunctionArgInfo ArgInfo; 104 105 // Graphics info. 106 unsigned PSInputAddr = 0; 107 unsigned PSInputEnable = 0; 108 109 /// Number of bytes of arguments this function has on the stack. If the callee 110 /// is expected to restore the argument stack this should be a multiple of 16, 111 /// all usable during a tail call. 112 /// 113 /// The alternative would forbid tail call optimisation in some cases: if we 114 /// want to transfer control from a function with 8-bytes of stack-argument 115 /// space to a function with 16-bytes then misalignment of this value would 116 /// make a stack adjustment necessary, which could not be undone by the 117 /// callee. 118 unsigned BytesInStackArgArea = 0; 119 120 bool ReturnsVoid = true; 121 122 // A pair of default/requested minimum/maximum flat work group sizes. 123 // Minimum - first, maximum - second. 124 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 125 126 // A pair of default/requested minimum/maximum number of waves per execution 127 // unit. Minimum - first, maximum - second. 128 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 129 130 // Stack object indices for work group IDs. 131 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 132 133 // Stack object indices for work item IDs. 134 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 135 136 DenseMap<const Value *, 137 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 138 DenseMap<const Value *, 139 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 140 141 private: 142 unsigned LDSWaveSpillSize = 0; 143 unsigned NumUserSGPRs = 0; 144 unsigned NumSystemSGPRs = 0; 145 146 bool HasSpilledSGPRs = false; 147 bool HasSpilledVGPRs = false; 148 bool HasNonSpillStackObjects = false; 149 150 unsigned NumSpilledSGPRs = 0; 151 unsigned NumSpilledVGPRs = 0; 152 153 // Feature bits required for inputs passed in user SGPRs. 154 bool PrivateSegmentBuffer : 1; 155 bool DispatchPtr : 1; 156 bool QueuePtr : 1; 157 bool KernargSegmentPtr : 1; 158 bool DispatchID : 1; 159 bool FlatScratchInit : 1; 160 bool GridWorkgroupCountX : 1; 161 bool GridWorkgroupCountY : 1; 162 bool GridWorkgroupCountZ : 1; 163 164 // Feature bits required for inputs passed in system SGPRs. 165 bool WorkGroupIDX : 1; // Always initialized. 166 bool WorkGroupIDY : 1; 167 bool WorkGroupIDZ : 1; 168 bool WorkGroupInfo : 1; 169 bool PrivateSegmentWaveByteOffset : 1; 170 171 bool WorkItemIDX : 1; // Always initialized. 172 bool WorkItemIDY : 1; 173 bool WorkItemIDZ : 1; 174 175 // Private memory buffer 176 // Compute directly in sgpr[0:1] 177 // Other shaders indirect 64-bits at sgpr[0:1] 178 bool ImplicitBufferPtr : 1; 179 180 // Pointer to where the ABI inserts special kernel arguments separate from the 181 // user arguments. This is an offset from the KernargSegmentPtr. 182 bool ImplicitArgPtr : 1; 183 184 // The hard-wired high half of the address of the global information table 185 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 186 // current hardware only allows a 16 bit value. 187 unsigned GITPtrHigh; 188 189 MCPhysReg getNextUserSGPR() const { 190 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 191 return AMDGPU::SGPR0 + NumUserSGPRs; 192 } 193 194 MCPhysReg getNextSystemSGPR() const { 195 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 196 } 197 198 public: 199 struct SpilledReg { 200 unsigned VGPR = AMDGPU::NoRegister; 201 int Lane = -1; 202 203 SpilledReg() = default; 204 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 205 206 bool hasLane() { return Lane != -1;} 207 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 208 }; 209 210 struct SGPRSpillVGPRCSR { 211 // VGPR used for SGPR spills 212 unsigned VGPR; 213 214 // If the VGPR is a CSR, the stack slot used to save/restore it in the 215 // prolog/epilog. 216 Optional<int> FI; 217 218 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 219 }; 220 221 private: 222 // SGPR->VGPR spilling support. 223 using SpillRegMask = std::pair<unsigned, unsigned>; 224 225 // Track VGPR + wave index for each subregister of the SGPR spilled to 226 // frameindex key. 227 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 228 unsigned NumVGPRSpillLanes = 0; 229 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 230 231 public: 232 SIMachineFunctionInfo(const MachineFunction &MF); 233 234 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 235 auto I = SGPRToVGPRSpills.find(FrameIndex); 236 return (I == SGPRToVGPRSpills.end()) ? 237 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 238 } 239 240 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 241 return SpillVGPRs; 242 } 243 244 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 245 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 246 247 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } 248 unsigned getTIDReg() const { return TIDReg; } 249 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 250 251 unsigned getBytesInStackArgArea() const { 252 return BytesInStackArgArea; 253 } 254 255 void setBytesInStackArgArea(unsigned Bytes) { 256 BytesInStackArgArea = Bytes; 257 } 258 259 // Add user SGPRs. 260 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 261 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 262 unsigned addQueuePtr(const SIRegisterInfo &TRI); 263 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 264 unsigned addDispatchID(const SIRegisterInfo &TRI); 265 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 266 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 267 268 // Add system SGPRs. 269 unsigned addWorkGroupIDX() { 270 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 271 NumSystemSGPRs += 1; 272 return ArgInfo.WorkGroupIDX.getRegister(); 273 } 274 275 unsigned addWorkGroupIDY() { 276 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 277 NumSystemSGPRs += 1; 278 return ArgInfo.WorkGroupIDY.getRegister(); 279 } 280 281 unsigned addWorkGroupIDZ() { 282 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 283 NumSystemSGPRs += 1; 284 return ArgInfo.WorkGroupIDZ.getRegister(); 285 } 286 287 unsigned addWorkGroupInfo() { 288 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 289 NumSystemSGPRs += 1; 290 return ArgInfo.WorkGroupInfo.getRegister(); 291 } 292 293 // Add special VGPR inputs 294 void setWorkItemIDX(ArgDescriptor Arg) { 295 ArgInfo.WorkItemIDX = Arg; 296 } 297 298 void setWorkItemIDY(ArgDescriptor Arg) { 299 ArgInfo.WorkItemIDY = Arg; 300 } 301 302 void setWorkItemIDZ(ArgDescriptor Arg) { 303 ArgInfo.WorkItemIDZ = Arg; 304 } 305 306 unsigned addPrivateSegmentWaveByteOffset() { 307 ArgInfo.PrivateSegmentWaveByteOffset 308 = ArgDescriptor::createRegister(getNextSystemSGPR()); 309 NumSystemSGPRs += 1; 310 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 311 } 312 313 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 314 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 315 } 316 317 bool hasPrivateSegmentBuffer() const { 318 return PrivateSegmentBuffer; 319 } 320 321 bool hasDispatchPtr() const { 322 return DispatchPtr; 323 } 324 325 bool hasQueuePtr() const { 326 return QueuePtr; 327 } 328 329 bool hasKernargSegmentPtr() const { 330 return KernargSegmentPtr; 331 } 332 333 bool hasDispatchID() const { 334 return DispatchID; 335 } 336 337 bool hasFlatScratchInit() const { 338 return FlatScratchInit; 339 } 340 341 bool hasGridWorkgroupCountX() const { 342 return GridWorkgroupCountX; 343 } 344 345 bool hasGridWorkgroupCountY() const { 346 return GridWorkgroupCountY; 347 } 348 349 bool hasGridWorkgroupCountZ() const { 350 return GridWorkgroupCountZ; 351 } 352 353 bool hasWorkGroupIDX() const { 354 return WorkGroupIDX; 355 } 356 357 bool hasWorkGroupIDY() const { 358 return WorkGroupIDY; 359 } 360 361 bool hasWorkGroupIDZ() const { 362 return WorkGroupIDZ; 363 } 364 365 bool hasWorkGroupInfo() const { 366 return WorkGroupInfo; 367 } 368 369 bool hasPrivateSegmentWaveByteOffset() const { 370 return PrivateSegmentWaveByteOffset; 371 } 372 373 bool hasWorkItemIDX() const { 374 return WorkItemIDX; 375 } 376 377 bool hasWorkItemIDY() const { 378 return WorkItemIDY; 379 } 380 381 bool hasWorkItemIDZ() const { 382 return WorkItemIDZ; 383 } 384 385 bool hasImplicitArgPtr() const { 386 return ImplicitArgPtr; 387 } 388 389 bool hasImplicitBufferPtr() const { 390 return ImplicitBufferPtr; 391 } 392 393 AMDGPUFunctionArgInfo &getArgInfo() { 394 return ArgInfo; 395 } 396 397 const AMDGPUFunctionArgInfo &getArgInfo() const { 398 return ArgInfo; 399 } 400 401 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 402 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 403 return ArgInfo.getPreloadedValue(Value); 404 } 405 406 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 407 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 408 } 409 410 unsigned getGITPtrHigh() const { 411 return GITPtrHigh; 412 } 413 414 unsigned getNumUserSGPRs() const { 415 return NumUserSGPRs; 416 } 417 418 unsigned getNumPreloadedSGPRs() const { 419 return NumUserSGPRs + NumSystemSGPRs; 420 } 421 422 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 423 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 424 } 425 426 /// \brief Returns the physical register reserved for use as the resource 427 /// descriptor for scratch accesses. 428 unsigned getScratchRSrcReg() const { 429 return ScratchRSrcReg; 430 } 431 432 void setScratchRSrcReg(unsigned Reg) { 433 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 434 ScratchRSrcReg = Reg; 435 } 436 437 unsigned getScratchWaveOffsetReg() const { 438 return ScratchWaveOffsetReg; 439 } 440 441 unsigned getFrameOffsetReg() const { 442 return FrameOffsetReg; 443 } 444 445 void setStackPtrOffsetReg(unsigned Reg) { 446 StackPtrOffsetReg = Reg; 447 } 448 449 // Note the unset value for this is AMDGPU::SP_REG rather than 450 // NoRegister. This is mostly a workaround for MIR tests where state that 451 // can't be directly computed from the function is not preserved in serialized 452 // MIR. 453 unsigned getStackPtrOffsetReg() const { 454 return StackPtrOffsetReg; 455 } 456 457 void setScratchWaveOffsetReg(unsigned Reg) { 458 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 459 ScratchWaveOffsetReg = Reg; 460 if (isEntryFunction()) 461 FrameOffsetReg = ScratchWaveOffsetReg; 462 } 463 464 unsigned getQueuePtrUserSGPR() const { 465 return ArgInfo.QueuePtr.getRegister(); 466 } 467 468 unsigned getImplicitBufferPtrUserSGPR() const { 469 return ArgInfo.ImplicitBufferPtr.getRegister(); 470 } 471 472 bool hasSpilledSGPRs() const { 473 return HasSpilledSGPRs; 474 } 475 476 void setHasSpilledSGPRs(bool Spill = true) { 477 HasSpilledSGPRs = Spill; 478 } 479 480 bool hasSpilledVGPRs() const { 481 return HasSpilledVGPRs; 482 } 483 484 void setHasSpilledVGPRs(bool Spill = true) { 485 HasSpilledVGPRs = Spill; 486 } 487 488 bool hasNonSpillStackObjects() const { 489 return HasNonSpillStackObjects; 490 } 491 492 void setHasNonSpillStackObjects(bool StackObject = true) { 493 HasNonSpillStackObjects = StackObject; 494 } 495 496 unsigned getNumSpilledSGPRs() const { 497 return NumSpilledSGPRs; 498 } 499 500 unsigned getNumSpilledVGPRs() const { 501 return NumSpilledVGPRs; 502 } 503 504 void addToSpilledSGPRs(unsigned num) { 505 NumSpilledSGPRs += num; 506 } 507 508 void addToSpilledVGPRs(unsigned num) { 509 NumSpilledVGPRs += num; 510 } 511 512 unsigned getPSInputAddr() const { 513 return PSInputAddr; 514 } 515 516 unsigned getPSInputEnable() const { 517 return PSInputEnable; 518 } 519 520 bool isPSInputAllocated(unsigned Index) const { 521 return PSInputAddr & (1 << Index); 522 } 523 524 void markPSInputAllocated(unsigned Index) { 525 PSInputAddr |= 1 << Index; 526 } 527 528 void markPSInputEnabled(unsigned Index) { 529 PSInputEnable |= 1 << Index; 530 } 531 532 bool returnsVoid() const { 533 return ReturnsVoid; 534 } 535 536 void setIfReturnsVoid(bool Value) { 537 ReturnsVoid = Value; 538 } 539 540 /// \returns A pair of default/requested minimum/maximum flat work group sizes 541 /// for this function. 542 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 543 return FlatWorkGroupSizes; 544 } 545 546 /// \returns Default/requested minimum flat work group size for this function. 547 unsigned getMinFlatWorkGroupSize() const { 548 return FlatWorkGroupSizes.first; 549 } 550 551 /// \returns Default/requested maximum flat work group size for this function. 552 unsigned getMaxFlatWorkGroupSize() const { 553 return FlatWorkGroupSizes.second; 554 } 555 556 /// \returns A pair of default/requested minimum/maximum number of waves per 557 /// execution unit. 558 std::pair<unsigned, unsigned> getWavesPerEU() const { 559 return WavesPerEU; 560 } 561 562 /// \returns Default/requested minimum number of waves per execution unit. 563 unsigned getMinWavesPerEU() const { 564 return WavesPerEU.first; 565 } 566 567 /// \returns Default/requested maximum number of waves per execution unit. 568 unsigned getMaxWavesPerEU() const { 569 return WavesPerEU.second; 570 } 571 572 /// \returns Stack object index for \p Dim's work group ID. 573 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 574 assert(Dim < 3); 575 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 576 } 577 578 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 579 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 580 assert(Dim < 3); 581 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 582 } 583 584 /// \returns Stack object index for \p Dim's work item ID. 585 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 586 assert(Dim < 3); 587 return DebuggerWorkItemIDStackObjectIndices[Dim]; 588 } 589 590 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 591 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 592 assert(Dim < 3); 593 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 594 } 595 596 /// \returns SGPR used for \p Dim's work group ID. 597 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 598 switch (Dim) { 599 case 0: 600 assert(hasWorkGroupIDX()); 601 return ArgInfo.WorkGroupIDX.getRegister(); 602 case 1: 603 assert(hasWorkGroupIDY()); 604 return ArgInfo.WorkGroupIDY.getRegister(); 605 case 2: 606 assert(hasWorkGroupIDZ()); 607 return ArgInfo.WorkGroupIDZ.getRegister(); 608 } 609 llvm_unreachable("unexpected dimension"); 610 } 611 612 /// \returns VGPR used for \p Dim' work item ID. 613 unsigned getWorkItemIDVGPR(unsigned Dim) const { 614 switch (Dim) { 615 case 0: 616 assert(hasWorkItemIDX()); 617 return AMDGPU::VGPR0; 618 case 1: 619 assert(hasWorkItemIDY()); 620 return AMDGPU::VGPR1; 621 case 2: 622 assert(hasWorkItemIDZ()); 623 return AMDGPU::VGPR2; 624 } 625 llvm_unreachable("unexpected dimension"); 626 } 627 628 unsigned getLDSWaveSpillSize() const { 629 return LDSWaveSpillSize; 630 } 631 632 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 633 const Value *BufferRsrc) { 634 assert(BufferRsrc); 635 auto PSV = BufferPSVs.try_emplace( 636 BufferRsrc, 637 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 638 return PSV.first->second.get(); 639 } 640 641 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 642 const Value *ImgRsrc) { 643 assert(ImgRsrc); 644 auto PSV = ImagePSVs.try_emplace( 645 ImgRsrc, 646 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 647 return PSV.first->second.get(); 648 } 649 }; 650 651 } // end namespace llvm 652 653 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 654