1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "AMDGPUArgumentUsageInfo.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include <array> 28 #include <cassert> 29 #include <utility> 30 #include <vector> 31 32 namespace llvm { 33 34 class MachineFrameInfo; 35 class MachineFunction; 36 class TargetRegisterClass; 37 38 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 39 public: 40 explicit AMDGPUImagePseudoSourceValue() : 41 PseudoSourceValue(PseudoSourceValue::TargetCustom) {} 42 43 bool isConstant(const MachineFrameInfo *) const override { 44 // This should probably be true for most images, but we will start by being 45 // conservative. 46 return false; 47 } 48 49 bool isAliased(const MachineFrameInfo *) const override { 50 // FIXME: If we ever change image intrinsics to accept fat pointers, then 51 // this could be true for some cases. 52 return false; 53 } 54 55 bool mayAlias(const MachineFrameInfo *) const override { 56 // FIXME: If we ever change image intrinsics to accept fat pointers, then 57 // this could be true for some cases. 58 return false; 59 } 60 }; 61 62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 63 public: 64 explicit AMDGPUBufferPseudoSourceValue() : 65 PseudoSourceValue(PseudoSourceValue::TargetCustom) {} 66 67 bool isConstant(const MachineFrameInfo *) const override { 68 // This should probably be true for most images, but we will start by being 69 // conservative. 70 return false; 71 } 72 73 bool isAliased(const MachineFrameInfo *) const override { 74 // FIXME: If we ever change image intrinsics to accept fat pointers, then 75 // this could be true for some cases. 76 return false; 77 } 78 79 bool mayAlias(const MachineFrameInfo *) const override { 80 // FIXME: If we ever change image intrinsics to accept fat pointers, then 81 // this could be true for some cases. 82 return false; 83 } 84 }; 85 86 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 87 /// tells the hardware which interpolation parameters to load. 88 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 89 // FIXME: This should be removed and getPreloadedValue moved here. 90 friend class SIRegisterInfo; 91 92 unsigned TIDReg = AMDGPU::NoRegister; 93 94 // Registers that may be reserved for spilling purposes. These may be the same 95 // as the input registers. 96 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 97 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 98 99 // This is the current function's incremented size from the kernel's scratch 100 // wave offset register. For an entry function, this is exactly the same as 101 // the ScratchWaveOffsetReg. 102 unsigned FrameOffsetReg = AMDGPU::FP_REG; 103 104 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 105 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 106 107 AMDGPUFunctionArgInfo ArgInfo; 108 109 // Graphics info. 110 unsigned PSInputAddr = 0; 111 unsigned PSInputEnable = 0; 112 113 bool ReturnsVoid = true; 114 115 // A pair of default/requested minimum/maximum flat work group sizes. 116 // Minimum - first, maximum - second. 117 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 118 119 // A pair of default/requested minimum/maximum number of waves per execution 120 // unit. Minimum - first, maximum - second. 121 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 122 123 // Stack object indices for work group IDs. 124 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 125 126 // Stack object indices for work item IDs. 127 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 128 129 AMDGPUBufferPseudoSourceValue BufferPSV; 130 AMDGPUImagePseudoSourceValue ImagePSV; 131 132 private: 133 unsigned LDSWaveSpillSize = 0; 134 unsigned ScratchOffsetReg; 135 unsigned NumUserSGPRs = 0; 136 unsigned NumSystemSGPRs = 0; 137 138 bool HasSpilledSGPRs = false; 139 bool HasSpilledVGPRs = false; 140 bool HasNonSpillStackObjects = false; 141 142 unsigned NumSpilledSGPRs = 0; 143 unsigned NumSpilledVGPRs = 0; 144 145 // Feature bits required for inputs passed in user SGPRs. 146 bool PrivateSegmentBuffer : 1; 147 bool DispatchPtr : 1; 148 bool QueuePtr : 1; 149 bool KernargSegmentPtr : 1; 150 bool DispatchID : 1; 151 bool FlatScratchInit : 1; 152 bool GridWorkgroupCountX : 1; 153 bool GridWorkgroupCountY : 1; 154 bool GridWorkgroupCountZ : 1; 155 156 // Feature bits required for inputs passed in system SGPRs. 157 bool WorkGroupIDX : 1; // Always initialized. 158 bool WorkGroupIDY : 1; 159 bool WorkGroupIDZ : 1; 160 bool WorkGroupInfo : 1; 161 bool PrivateSegmentWaveByteOffset : 1; 162 163 bool WorkItemIDX : 1; // Always initialized. 164 bool WorkItemIDY : 1; 165 bool WorkItemIDZ : 1; 166 167 // Private memory buffer 168 // Compute directly in sgpr[0:1] 169 // Other shaders indirect 64-bits at sgpr[0:1] 170 bool ImplicitBufferPtr : 1; 171 172 // Pointer to where the ABI inserts special kernel arguments separate from the 173 // user arguments. This is an offset from the KernargSegmentPtr. 174 bool ImplicitArgPtr : 1; 175 176 MCPhysReg getNextUserSGPR() const { 177 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 178 return AMDGPU::SGPR0 + NumUserSGPRs; 179 } 180 181 MCPhysReg getNextSystemSGPR() const { 182 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 183 } 184 185 public: 186 struct SpilledReg { 187 unsigned VGPR = AMDGPU::NoRegister; 188 int Lane = -1; 189 190 SpilledReg() = default; 191 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 192 193 bool hasLane() { return Lane != -1;} 194 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 195 }; 196 197 struct SGPRSpillVGPRCSR { 198 // VGPR used for SGPR spills 199 unsigned VGPR; 200 201 // If the VGPR is a CSR, the stack slot used to save/restore it in the 202 // prolog/epilog. 203 Optional<int> FI; 204 205 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 206 }; 207 208 private: 209 // SGPR->VGPR spilling support. 210 using SpillRegMask = std::pair<unsigned, unsigned>; 211 212 // Track VGPR + wave index for each subregister of the SGPR spilled to 213 // frameindex key. 214 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 215 unsigned NumVGPRSpillLanes = 0; 216 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 217 218 public: 219 SIMachineFunctionInfo(const MachineFunction &MF); 220 221 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 222 auto I = SGPRToVGPRSpills.find(FrameIndex); 223 return (I == SGPRToVGPRSpills.end()) ? 224 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 225 } 226 227 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 228 return SpillVGPRs; 229 } 230 231 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 232 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 233 234 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } 235 unsigned getTIDReg() const { return TIDReg; } 236 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 237 238 // Add user SGPRs. 239 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 240 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 241 unsigned addQueuePtr(const SIRegisterInfo &TRI); 242 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 243 unsigned addDispatchID(const SIRegisterInfo &TRI); 244 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 245 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 246 247 // Add system SGPRs. 248 unsigned addWorkGroupIDX() { 249 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 250 NumSystemSGPRs += 1; 251 return ArgInfo.WorkGroupIDX.getRegister(); 252 } 253 254 unsigned addWorkGroupIDY() { 255 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 256 NumSystemSGPRs += 1; 257 return ArgInfo.WorkGroupIDY.getRegister(); 258 } 259 260 unsigned addWorkGroupIDZ() { 261 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 262 NumSystemSGPRs += 1; 263 return ArgInfo.WorkGroupIDZ.getRegister(); 264 } 265 266 unsigned addWorkGroupInfo() { 267 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 268 NumSystemSGPRs += 1; 269 return ArgInfo.WorkGroupInfo.getRegister(); 270 } 271 272 // Add special VGPR inputs 273 void setWorkItemIDX(ArgDescriptor Arg) { 274 ArgInfo.WorkItemIDX = Arg; 275 } 276 277 void setWorkItemIDY(ArgDescriptor Arg) { 278 ArgInfo.WorkItemIDY = Arg; 279 } 280 281 void setWorkItemIDZ(ArgDescriptor Arg) { 282 ArgInfo.WorkItemIDZ = Arg; 283 } 284 285 unsigned addPrivateSegmentWaveByteOffset() { 286 ArgInfo.PrivateSegmentWaveByteOffset 287 = ArgDescriptor::createRegister(getNextSystemSGPR()); 288 NumSystemSGPRs += 1; 289 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 290 } 291 292 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 293 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 294 } 295 296 bool hasPrivateSegmentBuffer() const { 297 return PrivateSegmentBuffer; 298 } 299 300 bool hasDispatchPtr() const { 301 return DispatchPtr; 302 } 303 304 bool hasQueuePtr() const { 305 return QueuePtr; 306 } 307 308 bool hasKernargSegmentPtr() const { 309 return KernargSegmentPtr; 310 } 311 312 bool hasDispatchID() const { 313 return DispatchID; 314 } 315 316 bool hasFlatScratchInit() const { 317 return FlatScratchInit; 318 } 319 320 bool hasGridWorkgroupCountX() const { 321 return GridWorkgroupCountX; 322 } 323 324 bool hasGridWorkgroupCountY() const { 325 return GridWorkgroupCountY; 326 } 327 328 bool hasGridWorkgroupCountZ() const { 329 return GridWorkgroupCountZ; 330 } 331 332 bool hasWorkGroupIDX() const { 333 return WorkGroupIDX; 334 } 335 336 bool hasWorkGroupIDY() const { 337 return WorkGroupIDY; 338 } 339 340 bool hasWorkGroupIDZ() const { 341 return WorkGroupIDZ; 342 } 343 344 bool hasWorkGroupInfo() const { 345 return WorkGroupInfo; 346 } 347 348 bool hasPrivateSegmentWaveByteOffset() const { 349 return PrivateSegmentWaveByteOffset; 350 } 351 352 bool hasWorkItemIDX() const { 353 return WorkItemIDX; 354 } 355 356 bool hasWorkItemIDY() const { 357 return WorkItemIDY; 358 } 359 360 bool hasWorkItemIDZ() const { 361 return WorkItemIDZ; 362 } 363 364 bool hasImplicitArgPtr() const { 365 return ImplicitArgPtr; 366 } 367 368 bool hasImplicitBufferPtr() const { 369 return ImplicitBufferPtr; 370 } 371 372 AMDGPUFunctionArgInfo &getArgInfo() { 373 return ArgInfo; 374 } 375 376 const AMDGPUFunctionArgInfo &getArgInfo() const { 377 return ArgInfo; 378 } 379 380 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 381 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 382 return ArgInfo.getPreloadedValue(Value); 383 } 384 385 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 386 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 387 } 388 389 unsigned getNumUserSGPRs() const { 390 return NumUserSGPRs; 391 } 392 393 unsigned getNumPreloadedSGPRs() const { 394 return NumUserSGPRs + NumSystemSGPRs; 395 } 396 397 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 398 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 399 } 400 401 /// \brief Returns the physical register reserved for use as the resource 402 /// descriptor for scratch accesses. 403 unsigned getScratchRSrcReg() const { 404 return ScratchRSrcReg; 405 } 406 407 void setScratchRSrcReg(unsigned Reg) { 408 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 409 ScratchRSrcReg = Reg; 410 } 411 412 unsigned getScratchWaveOffsetReg() const { 413 return ScratchWaveOffsetReg; 414 } 415 416 unsigned getFrameOffsetReg() const { 417 return FrameOffsetReg; 418 } 419 420 void setStackPtrOffsetReg(unsigned Reg) { 421 StackPtrOffsetReg = Reg; 422 } 423 424 // Note the unset value for this is AMDGPU::SP_REG rather than 425 // NoRegister. This is mostly a workaround for MIR tests where state that 426 // can't be directly computed from the function is not preserved in serialized 427 // MIR. 428 unsigned getStackPtrOffsetReg() const { 429 return StackPtrOffsetReg; 430 } 431 432 void setScratchWaveOffsetReg(unsigned Reg) { 433 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 434 ScratchWaveOffsetReg = Reg; 435 if (isEntryFunction()) 436 FrameOffsetReg = ScratchWaveOffsetReg; 437 } 438 439 unsigned getQueuePtrUserSGPR() const { 440 return ArgInfo.QueuePtr.getRegister(); 441 } 442 443 unsigned getImplicitBufferPtrUserSGPR() const { 444 return ArgInfo.ImplicitBufferPtr.getRegister(); 445 } 446 447 bool hasSpilledSGPRs() const { 448 return HasSpilledSGPRs; 449 } 450 451 void setHasSpilledSGPRs(bool Spill = true) { 452 HasSpilledSGPRs = Spill; 453 } 454 455 bool hasSpilledVGPRs() const { 456 return HasSpilledVGPRs; 457 } 458 459 void setHasSpilledVGPRs(bool Spill = true) { 460 HasSpilledVGPRs = Spill; 461 } 462 463 bool hasNonSpillStackObjects() const { 464 return HasNonSpillStackObjects; 465 } 466 467 void setHasNonSpillStackObjects(bool StackObject = true) { 468 HasNonSpillStackObjects = StackObject; 469 } 470 471 unsigned getNumSpilledSGPRs() const { 472 return NumSpilledSGPRs; 473 } 474 475 unsigned getNumSpilledVGPRs() const { 476 return NumSpilledVGPRs; 477 } 478 479 void addToSpilledSGPRs(unsigned num) { 480 NumSpilledSGPRs += num; 481 } 482 483 void addToSpilledVGPRs(unsigned num) { 484 NumSpilledVGPRs += num; 485 } 486 487 unsigned getPSInputAddr() const { 488 return PSInputAddr; 489 } 490 491 unsigned getPSInputEnable() const { 492 return PSInputEnable; 493 } 494 495 bool isPSInputAllocated(unsigned Index) const { 496 return PSInputAddr & (1 << Index); 497 } 498 499 void markPSInputAllocated(unsigned Index) { 500 PSInputAddr |= 1 << Index; 501 } 502 503 void markPSInputEnabled(unsigned Index) { 504 PSInputEnable |= 1 << Index; 505 } 506 507 bool returnsVoid() const { 508 return ReturnsVoid; 509 } 510 511 void setIfReturnsVoid(bool Value) { 512 ReturnsVoid = Value; 513 } 514 515 /// \returns A pair of default/requested minimum/maximum flat work group sizes 516 /// for this function. 517 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 518 return FlatWorkGroupSizes; 519 } 520 521 /// \returns Default/requested minimum flat work group size for this function. 522 unsigned getMinFlatWorkGroupSize() const { 523 return FlatWorkGroupSizes.first; 524 } 525 526 /// \returns Default/requested maximum flat work group size for this function. 527 unsigned getMaxFlatWorkGroupSize() const { 528 return FlatWorkGroupSizes.second; 529 } 530 531 /// \returns A pair of default/requested minimum/maximum number of waves per 532 /// execution unit. 533 std::pair<unsigned, unsigned> getWavesPerEU() const { 534 return WavesPerEU; 535 } 536 537 /// \returns Default/requested minimum number of waves per execution unit. 538 unsigned getMinWavesPerEU() const { 539 return WavesPerEU.first; 540 } 541 542 /// \returns Default/requested maximum number of waves per execution unit. 543 unsigned getMaxWavesPerEU() const { 544 return WavesPerEU.second; 545 } 546 547 /// \returns Stack object index for \p Dim's work group ID. 548 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 549 assert(Dim < 3); 550 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 551 } 552 553 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 554 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 555 assert(Dim < 3); 556 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 557 } 558 559 /// \returns Stack object index for \p Dim's work item ID. 560 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 561 assert(Dim < 3); 562 return DebuggerWorkItemIDStackObjectIndices[Dim]; 563 } 564 565 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 566 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 567 assert(Dim < 3); 568 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 569 } 570 571 /// \returns SGPR used for \p Dim's work group ID. 572 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 573 switch (Dim) { 574 case 0: 575 assert(hasWorkGroupIDX()); 576 return ArgInfo.WorkGroupIDX.getRegister(); 577 case 1: 578 assert(hasWorkGroupIDY()); 579 return ArgInfo.WorkGroupIDY.getRegister(); 580 case 2: 581 assert(hasWorkGroupIDZ()); 582 return ArgInfo.WorkGroupIDZ.getRegister(); 583 } 584 llvm_unreachable("unexpected dimension"); 585 } 586 587 /// \returns VGPR used for \p Dim' work item ID. 588 unsigned getWorkItemIDVGPR(unsigned Dim) const { 589 switch (Dim) { 590 case 0: 591 assert(hasWorkItemIDX()); 592 return AMDGPU::VGPR0; 593 case 1: 594 assert(hasWorkItemIDY()); 595 return AMDGPU::VGPR1; 596 case 2: 597 assert(hasWorkItemIDZ()); 598 return AMDGPU::VGPR2; 599 } 600 llvm_unreachable("unexpected dimension"); 601 } 602 603 unsigned getLDSWaveSpillSize() const { 604 return LDSWaveSpillSize; 605 } 606 607 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 608 return &BufferPSV; 609 } 610 611 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 612 return &ImagePSV; 613 } 614 }; 615 616 } // end namespace llvm 617 618 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 619