1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/PseudoSourceValue.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include <array> 24 #include <cassert> 25 #include <map> 26 #include <utility> 27 28 namespace llvm { 29 30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 31 public: 32 explicit AMDGPUImagePseudoSourceValue() : 33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 34 35 bool isConstant(const MachineFrameInfo *) const override { 36 // This should probably be true for most images, but we will start by being 37 // conservative. 38 return false; 39 } 40 41 bool isAliased(const MachineFrameInfo *) const override { 42 // FIXME: If we ever change image intrinsics to accept fat pointers, then 43 // this could be true for some cases. 44 return false; 45 } 46 47 bool mayAlias(const MachineFrameInfo*) const override { 48 // FIXME: If we ever change image intrinsics to accept fat pointers, then 49 // this could be true for some cases. 50 return false; 51 } 52 }; 53 54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 55 public: 56 explicit AMDGPUBufferPseudoSourceValue() : 57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 58 59 bool isConstant(const MachineFrameInfo *) const override { 60 // This should probably be true for most images, but we will start by being 61 // conservative. 62 return false; 63 } 64 65 bool isAliased(const MachineFrameInfo *) const override { 66 // FIXME: If we ever change image intrinsics to accept fat pointers, then 67 // this could be true for some cases. 68 return false; 69 } 70 71 bool mayAlias(const MachineFrameInfo*) const override { 72 // FIXME: If we ever change image intrinsics to accept fat pointers, then 73 // this could be true for some cases. 74 return false; 75 } 76 }; 77 78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 79 /// tells the hardware which interpolation parameters to load. 80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 81 // FIXME: This should be removed and getPreloadedValue moved here. 82 friend class SIRegisterInfo; 83 84 unsigned TIDReg; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg; 89 unsigned ScratchWaveOffsetReg; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg; 98 99 // Input registers for non-HSA ABI 100 unsigned ImplicitBufferPtrUserSGPR; 101 102 // Input registers setup for the HSA ABI. 103 // User SGPRs in allocation order. 104 unsigned PrivateSegmentBufferUserSGPR; 105 unsigned DispatchPtrUserSGPR; 106 unsigned QueuePtrUserSGPR; 107 unsigned KernargSegmentPtrUserSGPR; 108 unsigned DispatchIDUserSGPR; 109 unsigned FlatScratchInitUserSGPR; 110 unsigned PrivateSegmentSizeUserSGPR; 111 unsigned GridWorkGroupCountXUserSGPR; 112 unsigned GridWorkGroupCountYUserSGPR; 113 unsigned GridWorkGroupCountZUserSGPR; 114 115 // System SGPRs in allocation order. 116 unsigned WorkGroupIDXSystemSGPR; 117 unsigned WorkGroupIDYSystemSGPR; 118 unsigned WorkGroupIDZSystemSGPR; 119 unsigned WorkGroupInfoSystemSGPR; 120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR; 121 122 // VGPR inputs. These are always v0, v1 and v2 for entry functions. 123 unsigned WorkItemIDXVGPR; 124 unsigned WorkItemIDYVGPR; 125 unsigned WorkItemIDZVGPR; 126 127 // Graphics info. 128 unsigned PSInputAddr; 129 unsigned PSInputEnable; 130 131 bool ReturnsVoid; 132 133 // A pair of default/requested minimum/maximum flat work group sizes. 134 // Minimum - first, maximum - second. 135 std::pair<unsigned, unsigned> FlatWorkGroupSizes; 136 137 // A pair of default/requested minimum/maximum number of waves per execution 138 // unit. Minimum - first, maximum - second. 139 std::pair<unsigned, unsigned> WavesPerEU; 140 141 // Stack object indices for work group IDs. 142 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; 143 // Stack object indices for work item IDs. 144 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; 145 146 AMDGPUBufferPseudoSourceValue BufferPSV; 147 AMDGPUImagePseudoSourceValue ImagePSV; 148 149 private: 150 unsigned LDSWaveSpillSize; 151 unsigned ScratchOffsetReg; 152 unsigned NumUserSGPRs; 153 unsigned NumSystemSGPRs; 154 155 bool HasSpilledSGPRs; 156 bool HasSpilledVGPRs; 157 bool HasNonSpillStackObjects; 158 159 unsigned NumSpilledSGPRs; 160 unsigned NumSpilledVGPRs; 161 162 // Feature bits required for inputs passed in user SGPRs. 163 bool PrivateSegmentBuffer : 1; 164 bool DispatchPtr : 1; 165 bool QueuePtr : 1; 166 bool KernargSegmentPtr : 1; 167 bool DispatchID : 1; 168 bool FlatScratchInit : 1; 169 bool GridWorkgroupCountX : 1; 170 bool GridWorkgroupCountY : 1; 171 bool GridWorkgroupCountZ : 1; 172 173 // Feature bits required for inputs passed in system SGPRs. 174 bool WorkGroupIDX : 1; // Always initialized. 175 bool WorkGroupIDY : 1; 176 bool WorkGroupIDZ : 1; 177 bool WorkGroupInfo : 1; 178 bool PrivateSegmentWaveByteOffset : 1; 179 180 bool WorkItemIDX : 1; // Always initialized. 181 bool WorkItemIDY : 1; 182 bool WorkItemIDZ : 1; 183 184 // Private memory buffer 185 // Compute directly in sgpr[0:1] 186 // Other shaders indirect 64-bits at sgpr[0:1] 187 bool ImplicitBufferPtr : 1; 188 189 MCPhysReg getNextUserSGPR() const { 190 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 191 return AMDGPU::SGPR0 + NumUserSGPRs; 192 } 193 194 MCPhysReg getNextSystemSGPR() const { 195 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 196 } 197 198 public: 199 struct SpilledReg { 200 unsigned VGPR = AMDGPU::NoRegister; 201 int Lane = -1; 202 203 SpilledReg() = default; 204 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } 205 206 bool hasLane() { return Lane != -1;} 207 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 208 }; 209 210 private: 211 // SGPR->VGPR spilling support. 212 typedef std::pair<unsigned, unsigned> SpillRegMask; 213 214 // Track VGPR + wave index for each subregister of the SGPR spilled to 215 // frameindex key. 216 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 217 unsigned NumVGPRSpillLanes = 0; 218 SmallVector<unsigned, 2> SpillVGPRs; 219 220 public: 221 222 SIMachineFunctionInfo(const MachineFunction &MF); 223 224 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 225 auto I = SGPRToVGPRSpills.find(FrameIndex); 226 return (I == SGPRToVGPRSpills.end()) ? 227 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 228 } 229 230 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 231 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 232 233 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; 234 unsigned getTIDReg() const { return TIDReg; }; 235 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 236 237 // Add user SGPRs. 238 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 239 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 240 unsigned addQueuePtr(const SIRegisterInfo &TRI); 241 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 242 unsigned addDispatchID(const SIRegisterInfo &TRI); 243 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 244 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 245 246 // Add system SGPRs. 247 unsigned addWorkGroupIDX() { 248 WorkGroupIDXSystemSGPR = getNextSystemSGPR(); 249 NumSystemSGPRs += 1; 250 return WorkGroupIDXSystemSGPR; 251 } 252 253 unsigned addWorkGroupIDY() { 254 WorkGroupIDYSystemSGPR = getNextSystemSGPR(); 255 NumSystemSGPRs += 1; 256 return WorkGroupIDYSystemSGPR; 257 } 258 259 unsigned addWorkGroupIDZ() { 260 WorkGroupIDZSystemSGPR = getNextSystemSGPR(); 261 NumSystemSGPRs += 1; 262 return WorkGroupIDZSystemSGPR; 263 } 264 265 unsigned addWorkGroupInfo() { 266 WorkGroupInfoSystemSGPR = getNextSystemSGPR(); 267 NumSystemSGPRs += 1; 268 return WorkGroupInfoSystemSGPR; 269 } 270 271 unsigned addPrivateSegmentWaveByteOffset() { 272 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); 273 NumSystemSGPRs += 1; 274 return PrivateSegmentWaveByteOffsetSystemSGPR; 275 } 276 277 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 278 PrivateSegmentWaveByteOffsetSystemSGPR = Reg; 279 } 280 281 bool hasPrivateSegmentBuffer() const { 282 return PrivateSegmentBuffer; 283 } 284 285 bool hasDispatchPtr() const { 286 return DispatchPtr; 287 } 288 289 bool hasQueuePtr() const { 290 return QueuePtr; 291 } 292 293 bool hasKernargSegmentPtr() const { 294 return KernargSegmentPtr; 295 } 296 297 bool hasDispatchID() const { 298 return DispatchID; 299 } 300 301 bool hasFlatScratchInit() const { 302 return FlatScratchInit; 303 } 304 305 bool hasGridWorkgroupCountX() const { 306 return GridWorkgroupCountX; 307 } 308 309 bool hasGridWorkgroupCountY() const { 310 return GridWorkgroupCountY; 311 } 312 313 bool hasGridWorkgroupCountZ() const { 314 return GridWorkgroupCountZ; 315 } 316 317 bool hasWorkGroupIDX() const { 318 return WorkGroupIDX; 319 } 320 321 bool hasWorkGroupIDY() const { 322 return WorkGroupIDY; 323 } 324 325 bool hasWorkGroupIDZ() const { 326 return WorkGroupIDZ; 327 } 328 329 bool hasWorkGroupInfo() const { 330 return WorkGroupInfo; 331 } 332 333 bool hasPrivateSegmentWaveByteOffset() const { 334 return PrivateSegmentWaveByteOffset; 335 } 336 337 bool hasWorkItemIDX() const { 338 return WorkItemIDX; 339 } 340 341 bool hasWorkItemIDY() const { 342 return WorkItemIDY; 343 } 344 345 bool hasWorkItemIDZ() const { 346 return WorkItemIDZ; 347 } 348 349 bool hasImplicitBufferPtr() const { 350 return ImplicitBufferPtr; 351 } 352 353 unsigned getNumUserSGPRs() const { 354 return NumUserSGPRs; 355 } 356 357 unsigned getNumPreloadedSGPRs() const { 358 return NumUserSGPRs + NumSystemSGPRs; 359 } 360 361 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 362 return PrivateSegmentWaveByteOffsetSystemSGPR; 363 } 364 365 /// \brief Returns the physical register reserved for use as the resource 366 /// descriptor for scratch accesses. 367 unsigned getScratchRSrcReg() const { 368 return ScratchRSrcReg; 369 } 370 371 void setScratchRSrcReg(unsigned Reg) { 372 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 373 ScratchRSrcReg = Reg; 374 } 375 376 unsigned getScratchWaveOffsetReg() const { 377 return ScratchWaveOffsetReg; 378 } 379 380 unsigned getFrameOffsetReg() const { 381 return FrameOffsetReg; 382 } 383 384 void setStackPtrOffsetReg(unsigned Reg) { 385 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 386 StackPtrOffsetReg = Reg; 387 } 388 389 unsigned getStackPtrOffsetReg() const { 390 return StackPtrOffsetReg; 391 } 392 393 void setScratchWaveOffsetReg(unsigned Reg) { 394 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 395 ScratchWaveOffsetReg = Reg; 396 if (isEntryFunction()) 397 FrameOffsetReg = ScratchWaveOffsetReg; 398 } 399 400 unsigned getQueuePtrUserSGPR() const { 401 return QueuePtrUserSGPR; 402 } 403 404 unsigned getImplicitBufferPtrUserSGPR() const { 405 return ImplicitBufferPtrUserSGPR; 406 } 407 408 bool hasSpilledSGPRs() const { 409 return HasSpilledSGPRs; 410 } 411 412 void setHasSpilledSGPRs(bool Spill = true) { 413 HasSpilledSGPRs = Spill; 414 } 415 416 bool hasSpilledVGPRs() const { 417 return HasSpilledVGPRs; 418 } 419 420 void setHasSpilledVGPRs(bool Spill = true) { 421 HasSpilledVGPRs = Spill; 422 } 423 424 bool hasNonSpillStackObjects() const { 425 return HasNonSpillStackObjects; 426 } 427 428 void setHasNonSpillStackObjects(bool StackObject = true) { 429 HasNonSpillStackObjects = StackObject; 430 } 431 432 unsigned getNumSpilledSGPRs() const { 433 return NumSpilledSGPRs; 434 } 435 436 unsigned getNumSpilledVGPRs() const { 437 return NumSpilledVGPRs; 438 } 439 440 void addToSpilledSGPRs(unsigned num) { 441 NumSpilledSGPRs += num; 442 } 443 444 void addToSpilledVGPRs(unsigned num) { 445 NumSpilledVGPRs += num; 446 } 447 448 unsigned getPSInputAddr() const { 449 return PSInputAddr; 450 } 451 452 unsigned getPSInputEnable() const { 453 return PSInputEnable; 454 } 455 456 bool isPSInputAllocated(unsigned Index) const { 457 return PSInputAddr & (1 << Index); 458 } 459 460 void markPSInputAllocated(unsigned Index) { 461 PSInputAddr |= 1 << Index; 462 } 463 464 void markPSInputEnabled(unsigned Index) { 465 PSInputEnable |= 1 << Index; 466 } 467 468 bool returnsVoid() const { 469 return ReturnsVoid; 470 } 471 472 void setIfReturnsVoid(bool Value) { 473 ReturnsVoid = Value; 474 } 475 476 /// \returns A pair of default/requested minimum/maximum flat work group sizes 477 /// for this function. 478 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 479 return FlatWorkGroupSizes; 480 } 481 482 /// \returns Default/requested minimum flat work group size for this function. 483 unsigned getMinFlatWorkGroupSize() const { 484 return FlatWorkGroupSizes.first; 485 } 486 487 /// \returns Default/requested maximum flat work group size for this function. 488 unsigned getMaxFlatWorkGroupSize() const { 489 return FlatWorkGroupSizes.second; 490 } 491 492 /// \returns A pair of default/requested minimum/maximum number of waves per 493 /// execution unit. 494 std::pair<unsigned, unsigned> getWavesPerEU() const { 495 return WavesPerEU; 496 } 497 498 /// \returns Default/requested minimum number of waves per execution unit. 499 unsigned getMinWavesPerEU() const { 500 return WavesPerEU.first; 501 } 502 503 /// \returns Default/requested maximum number of waves per execution unit. 504 unsigned getMaxWavesPerEU() const { 505 return WavesPerEU.second; 506 } 507 508 /// \returns Stack object index for \p Dim's work group ID. 509 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 510 assert(Dim < 3); 511 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 512 } 513 514 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 515 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 516 assert(Dim < 3); 517 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 518 } 519 520 /// \returns Stack object index for \p Dim's work item ID. 521 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 522 assert(Dim < 3); 523 return DebuggerWorkItemIDStackObjectIndices[Dim]; 524 } 525 526 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 527 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 528 assert(Dim < 3); 529 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 530 } 531 532 /// \returns SGPR used for \p Dim's work group ID. 533 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 534 switch (Dim) { 535 case 0: 536 assert(hasWorkGroupIDX()); 537 return WorkGroupIDXSystemSGPR; 538 case 1: 539 assert(hasWorkGroupIDY()); 540 return WorkGroupIDYSystemSGPR; 541 case 2: 542 assert(hasWorkGroupIDZ()); 543 return WorkGroupIDZSystemSGPR; 544 } 545 llvm_unreachable("unexpected dimension"); 546 } 547 548 /// \returns VGPR used for \p Dim' work item ID. 549 unsigned getWorkItemIDVGPR(unsigned Dim) const { 550 switch (Dim) { 551 case 0: 552 assert(hasWorkItemIDX()); 553 return AMDGPU::VGPR0; 554 case 1: 555 assert(hasWorkItemIDY()); 556 return AMDGPU::VGPR1; 557 case 2: 558 assert(hasWorkItemIDZ()); 559 return AMDGPU::VGPR2; 560 } 561 llvm_unreachable("unexpected dimension"); 562 } 563 564 unsigned getLDSWaveSpillSize() const { 565 return LDSWaveSpillSize; 566 } 567 568 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 569 return &BufferPSV; 570 } 571 572 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 573 return &ImagePSV; 574 } 575 }; 576 577 } // end namespace llvm 578 579 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 580