1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/PseudoSourceValue.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include <array> 24 #include <cassert> 25 #include <map> 26 #include <utility> 27 28 namespace llvm { 29 30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 31 public: 32 explicit AMDGPUImagePseudoSourceValue() : 33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 34 35 bool isConstant(const MachineFrameInfo *) const override { 36 // This should probably be true for most images, but we will start by being 37 // conservative. 38 return false; 39 } 40 41 bool isAliased(const MachineFrameInfo *) const override { 42 // FIXME: If we ever change image intrinsics to accept fat pointers, then 43 // this could be true for some cases. 44 return false; 45 } 46 47 bool mayAlias(const MachineFrameInfo*) const override { 48 // FIXME: If we ever change image intrinsics to accept fat pointers, then 49 // this could be true for some cases. 50 return false; 51 } 52 }; 53 54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 55 public: 56 explicit AMDGPUBufferPseudoSourceValue() : 57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 58 59 bool isConstant(const MachineFrameInfo *) const override { 60 // This should probably be true for most images, but we will start by being 61 // conservative. 62 return false; 63 } 64 65 bool isAliased(const MachineFrameInfo *) const override { 66 // FIXME: If we ever change image intrinsics to accept fat pointers, then 67 // this could be true for some cases. 68 return false; 69 } 70 71 bool mayAlias(const MachineFrameInfo*) const override { 72 // FIXME: If we ever change image intrinsics to accept fat pointers, then 73 // this could be true for some cases. 74 return false; 75 } 76 }; 77 78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 79 /// tells the hardware which interpolation parameters to load. 80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 81 // FIXME: This should be removed and getPreloadedValue moved here. 82 friend class SIRegisterInfo; 83 84 unsigned TIDReg; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg; 89 unsigned ScratchWaveOffsetReg; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg; 98 99 // Input registers for non-HSA ABI 100 unsigned ImplicitBufferPtrUserSGPR; 101 102 // Input registers setup for the HSA ABI. 103 // User SGPRs in allocation order. 104 unsigned PrivateSegmentBufferUserSGPR; 105 unsigned DispatchPtrUserSGPR; 106 unsigned QueuePtrUserSGPR; 107 unsigned KernargSegmentPtrUserSGPR; 108 unsigned DispatchIDUserSGPR; 109 unsigned FlatScratchInitUserSGPR; 110 unsigned PrivateSegmentSizeUserSGPR; 111 unsigned GridWorkGroupCountXUserSGPR; 112 unsigned GridWorkGroupCountYUserSGPR; 113 unsigned GridWorkGroupCountZUserSGPR; 114 115 // System SGPRs in allocation order. 116 unsigned WorkGroupIDXSystemSGPR; 117 unsigned WorkGroupIDYSystemSGPR; 118 unsigned WorkGroupIDZSystemSGPR; 119 unsigned WorkGroupInfoSystemSGPR; 120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR; 121 122 // VGPR inputs. These are always v0, v1 and v2 for entry functions. 123 unsigned WorkItemIDXVGPR; 124 unsigned WorkItemIDYVGPR; 125 unsigned WorkItemIDZVGPR; 126 127 // Graphics info. 128 unsigned PSInputAddr; 129 unsigned PSInputEnable; 130 131 bool ReturnsVoid; 132 133 // A pair of default/requested minimum/maximum flat work group sizes. 134 // Minimum - first, maximum - second. 135 std::pair<unsigned, unsigned> FlatWorkGroupSizes; 136 137 // A pair of default/requested minimum/maximum number of waves per execution 138 // unit. Minimum - first, maximum - second. 139 std::pair<unsigned, unsigned> WavesPerEU; 140 141 // Stack object indices for work group IDs. 142 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; 143 // Stack object indices for work item IDs. 144 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; 145 146 AMDGPUBufferPseudoSourceValue BufferPSV; 147 AMDGPUImagePseudoSourceValue ImagePSV; 148 149 private: 150 unsigned LDSWaveSpillSize; 151 unsigned ScratchOffsetReg; 152 unsigned NumUserSGPRs; 153 unsigned NumSystemSGPRs; 154 155 bool HasSpilledSGPRs; 156 bool HasSpilledVGPRs; 157 bool HasNonSpillStackObjects; 158 159 unsigned NumSpilledSGPRs; 160 unsigned NumSpilledVGPRs; 161 162 // Feature bits required for inputs passed in user SGPRs. 163 bool PrivateSegmentBuffer : 1; 164 bool DispatchPtr : 1; 165 bool QueuePtr : 1; 166 bool KernargSegmentPtr : 1; 167 bool DispatchID : 1; 168 bool FlatScratchInit : 1; 169 bool GridWorkgroupCountX : 1; 170 bool GridWorkgroupCountY : 1; 171 bool GridWorkgroupCountZ : 1; 172 173 // Feature bits required for inputs passed in system SGPRs. 174 bool WorkGroupIDX : 1; // Always initialized. 175 bool WorkGroupIDY : 1; 176 bool WorkGroupIDZ : 1; 177 bool WorkGroupInfo : 1; 178 bool PrivateSegmentWaveByteOffset : 1; 179 180 bool WorkItemIDX : 1; // Always initialized. 181 bool WorkItemIDY : 1; 182 bool WorkItemIDZ : 1; 183 184 // Private memory buffer 185 // Compute directly in sgpr[0:1] 186 // Other shaders indirect 64-bits at sgpr[0:1] 187 bool ImplicitBufferPtr : 1; 188 189 // Pointer to where the ABI inserts special kernel arguments separate from the 190 // user arguments. This is an offset from the KernargSegmentPtr. 191 bool ImplicitArgPtr : 1; 192 193 MCPhysReg getNextUserSGPR() const { 194 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 195 return AMDGPU::SGPR0 + NumUserSGPRs; 196 } 197 198 MCPhysReg getNextSystemSGPR() const { 199 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 200 } 201 202 public: 203 struct SpilledReg { 204 unsigned VGPR = AMDGPU::NoRegister; 205 int Lane = -1; 206 207 SpilledReg() = default; 208 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } 209 210 bool hasLane() { return Lane != -1;} 211 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 212 }; 213 214 struct SGPRSpillVGPRCSR { 215 // VGPR used for SGPR spills 216 unsigned VGPR; 217 218 // If the VGPR is a CSR, the stack slot used to save/restore it in the 219 // prolog/epilog. 220 Optional<int> FI; 221 222 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : 223 VGPR(V), 224 FI(F) {} 225 }; 226 227 private: 228 // SGPR->VGPR spilling support. 229 typedef std::pair<unsigned, unsigned> SpillRegMask; 230 231 // Track VGPR + wave index for each subregister of the SGPR spilled to 232 // frameindex key. 233 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 234 unsigned NumVGPRSpillLanes = 0; 235 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 236 237 public: 238 239 SIMachineFunctionInfo(const MachineFunction &MF); 240 241 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 242 auto I = SGPRToVGPRSpills.find(FrameIndex); 243 return (I == SGPRToVGPRSpills.end()) ? 244 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 245 } 246 247 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 248 return SpillVGPRs; 249 } 250 251 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 252 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 253 254 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; 255 unsigned getTIDReg() const { return TIDReg; }; 256 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 257 258 // Add user SGPRs. 259 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 260 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 261 unsigned addQueuePtr(const SIRegisterInfo &TRI); 262 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 263 unsigned addDispatchID(const SIRegisterInfo &TRI); 264 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 265 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 266 267 // Add system SGPRs. 268 unsigned addWorkGroupIDX() { 269 WorkGroupIDXSystemSGPR = getNextSystemSGPR(); 270 NumSystemSGPRs += 1; 271 return WorkGroupIDXSystemSGPR; 272 } 273 274 unsigned addWorkGroupIDY() { 275 WorkGroupIDYSystemSGPR = getNextSystemSGPR(); 276 NumSystemSGPRs += 1; 277 return WorkGroupIDYSystemSGPR; 278 } 279 280 unsigned addWorkGroupIDZ() { 281 WorkGroupIDZSystemSGPR = getNextSystemSGPR(); 282 NumSystemSGPRs += 1; 283 return WorkGroupIDZSystemSGPR; 284 } 285 286 unsigned addWorkGroupInfo() { 287 WorkGroupInfoSystemSGPR = getNextSystemSGPR(); 288 NumSystemSGPRs += 1; 289 return WorkGroupInfoSystemSGPR; 290 } 291 292 unsigned addPrivateSegmentWaveByteOffset() { 293 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); 294 NumSystemSGPRs += 1; 295 return PrivateSegmentWaveByteOffsetSystemSGPR; 296 } 297 298 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 299 PrivateSegmentWaveByteOffsetSystemSGPR = Reg; 300 } 301 302 bool hasPrivateSegmentBuffer() const { 303 return PrivateSegmentBuffer; 304 } 305 306 bool hasDispatchPtr() const { 307 return DispatchPtr; 308 } 309 310 bool hasQueuePtr() const { 311 return QueuePtr; 312 } 313 314 bool hasKernargSegmentPtr() const { 315 return KernargSegmentPtr; 316 } 317 318 bool hasDispatchID() const { 319 return DispatchID; 320 } 321 322 bool hasFlatScratchInit() const { 323 return FlatScratchInit; 324 } 325 326 bool hasGridWorkgroupCountX() const { 327 return GridWorkgroupCountX; 328 } 329 330 bool hasGridWorkgroupCountY() const { 331 return GridWorkgroupCountY; 332 } 333 334 bool hasGridWorkgroupCountZ() const { 335 return GridWorkgroupCountZ; 336 } 337 338 bool hasWorkGroupIDX() const { 339 return WorkGroupIDX; 340 } 341 342 bool hasWorkGroupIDY() const { 343 return WorkGroupIDY; 344 } 345 346 bool hasWorkGroupIDZ() const { 347 return WorkGroupIDZ; 348 } 349 350 bool hasWorkGroupInfo() const { 351 return WorkGroupInfo; 352 } 353 354 bool hasPrivateSegmentWaveByteOffset() const { 355 return PrivateSegmentWaveByteOffset; 356 } 357 358 bool hasWorkItemIDX() const { 359 return WorkItemIDX; 360 } 361 362 bool hasWorkItemIDY() const { 363 return WorkItemIDY; 364 } 365 366 bool hasWorkItemIDZ() const { 367 return WorkItemIDZ; 368 } 369 370 bool hasImplicitArgPtr() const { 371 return ImplicitArgPtr; 372 } 373 374 bool hasImplicitBufferPtr() const { 375 return ImplicitBufferPtr; 376 } 377 378 unsigned getNumUserSGPRs() const { 379 return NumUserSGPRs; 380 } 381 382 unsigned getNumPreloadedSGPRs() const { 383 return NumUserSGPRs + NumSystemSGPRs; 384 } 385 386 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 387 return PrivateSegmentWaveByteOffsetSystemSGPR; 388 } 389 390 /// \brief Returns the physical register reserved for use as the resource 391 /// descriptor for scratch accesses. 392 unsigned getScratchRSrcReg() const { 393 return ScratchRSrcReg; 394 } 395 396 void setScratchRSrcReg(unsigned Reg) { 397 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 398 ScratchRSrcReg = Reg; 399 } 400 401 unsigned getScratchWaveOffsetReg() const { 402 return ScratchWaveOffsetReg; 403 } 404 405 unsigned getFrameOffsetReg() const { 406 return FrameOffsetReg; 407 } 408 409 void setStackPtrOffsetReg(unsigned Reg) { 410 StackPtrOffsetReg = Reg; 411 } 412 413 // Note the unset value for this is AMDGPU::SP_REG rather than 414 // NoRegister. This is mostly a workaround for MIR tests where state that 415 // can't be directly computed from the function is not preserved in serialized 416 // MIR. 417 unsigned getStackPtrOffsetReg() const { 418 return StackPtrOffsetReg; 419 } 420 421 void setScratchWaveOffsetReg(unsigned Reg) { 422 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 423 ScratchWaveOffsetReg = Reg; 424 if (isEntryFunction()) 425 FrameOffsetReg = ScratchWaveOffsetReg; 426 } 427 428 unsigned getQueuePtrUserSGPR() const { 429 return QueuePtrUserSGPR; 430 } 431 432 unsigned getImplicitBufferPtrUserSGPR() const { 433 return ImplicitBufferPtrUserSGPR; 434 } 435 436 bool hasSpilledSGPRs() const { 437 return HasSpilledSGPRs; 438 } 439 440 void setHasSpilledSGPRs(bool Spill = true) { 441 HasSpilledSGPRs = Spill; 442 } 443 444 bool hasSpilledVGPRs() const { 445 return HasSpilledVGPRs; 446 } 447 448 void setHasSpilledVGPRs(bool Spill = true) { 449 HasSpilledVGPRs = Spill; 450 } 451 452 bool hasNonSpillStackObjects() const { 453 return HasNonSpillStackObjects; 454 } 455 456 void setHasNonSpillStackObjects(bool StackObject = true) { 457 HasNonSpillStackObjects = StackObject; 458 } 459 460 unsigned getNumSpilledSGPRs() const { 461 return NumSpilledSGPRs; 462 } 463 464 unsigned getNumSpilledVGPRs() const { 465 return NumSpilledVGPRs; 466 } 467 468 void addToSpilledSGPRs(unsigned num) { 469 NumSpilledSGPRs += num; 470 } 471 472 void addToSpilledVGPRs(unsigned num) { 473 NumSpilledVGPRs += num; 474 } 475 476 unsigned getPSInputAddr() const { 477 return PSInputAddr; 478 } 479 480 unsigned getPSInputEnable() const { 481 return PSInputEnable; 482 } 483 484 bool isPSInputAllocated(unsigned Index) const { 485 return PSInputAddr & (1 << Index); 486 } 487 488 void markPSInputAllocated(unsigned Index) { 489 PSInputAddr |= 1 << Index; 490 } 491 492 void markPSInputEnabled(unsigned Index) { 493 PSInputEnable |= 1 << Index; 494 } 495 496 bool returnsVoid() const { 497 return ReturnsVoid; 498 } 499 500 void setIfReturnsVoid(bool Value) { 501 ReturnsVoid = Value; 502 } 503 504 /// \returns A pair of default/requested minimum/maximum flat work group sizes 505 /// for this function. 506 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 507 return FlatWorkGroupSizes; 508 } 509 510 /// \returns Default/requested minimum flat work group size for this function. 511 unsigned getMinFlatWorkGroupSize() const { 512 return FlatWorkGroupSizes.first; 513 } 514 515 /// \returns Default/requested maximum flat work group size for this function. 516 unsigned getMaxFlatWorkGroupSize() const { 517 return FlatWorkGroupSizes.second; 518 } 519 520 /// \returns A pair of default/requested minimum/maximum number of waves per 521 /// execution unit. 522 std::pair<unsigned, unsigned> getWavesPerEU() const { 523 return WavesPerEU; 524 } 525 526 /// \returns Default/requested minimum number of waves per execution unit. 527 unsigned getMinWavesPerEU() const { 528 return WavesPerEU.first; 529 } 530 531 /// \returns Default/requested maximum number of waves per execution unit. 532 unsigned getMaxWavesPerEU() const { 533 return WavesPerEU.second; 534 } 535 536 /// \returns Stack object index for \p Dim's work group ID. 537 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 538 assert(Dim < 3); 539 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 540 } 541 542 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 543 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 544 assert(Dim < 3); 545 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 546 } 547 548 /// \returns Stack object index for \p Dim's work item ID. 549 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 550 assert(Dim < 3); 551 return DebuggerWorkItemIDStackObjectIndices[Dim]; 552 } 553 554 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 555 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 556 assert(Dim < 3); 557 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 558 } 559 560 /// \returns SGPR used for \p Dim's work group ID. 561 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 562 switch (Dim) { 563 case 0: 564 assert(hasWorkGroupIDX()); 565 return WorkGroupIDXSystemSGPR; 566 case 1: 567 assert(hasWorkGroupIDY()); 568 return WorkGroupIDYSystemSGPR; 569 case 2: 570 assert(hasWorkGroupIDZ()); 571 return WorkGroupIDZSystemSGPR; 572 } 573 llvm_unreachable("unexpected dimension"); 574 } 575 576 /// \returns VGPR used for \p Dim' work item ID. 577 unsigned getWorkItemIDVGPR(unsigned Dim) const { 578 switch (Dim) { 579 case 0: 580 assert(hasWorkItemIDX()); 581 return AMDGPU::VGPR0; 582 case 1: 583 assert(hasWorkItemIDY()); 584 return AMDGPU::VGPR1; 585 case 2: 586 assert(hasWorkItemIDZ()); 587 return AMDGPU::VGPR2; 588 } 589 llvm_unreachable("unexpected dimension"); 590 } 591 592 unsigned getLDSWaveSpillSize() const { 593 return LDSWaveSpillSize; 594 } 595 596 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 597 return &BufferPSV; 598 } 599 600 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 601 return &ImagePSV; 602 } 603 }; 604 605 } // end namespace llvm 606 607 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 608