1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/PseudoSourceValue.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include <array> 24 #include <cassert> 25 #include <map> 26 #include <utility> 27 28 namespace llvm { 29 30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 31 public: 32 explicit AMDGPUImagePseudoSourceValue() : 33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 34 35 bool isConstant(const MachineFrameInfo *) const override { 36 // This should probably be true for most images, but we will start by being 37 // conservative. 38 return false; 39 } 40 41 bool isAliased(const MachineFrameInfo *) const override { 42 // FIXME: If we ever change image intrinsics to accept fat pointers, then 43 // this could be true for some cases. 44 return false; 45 } 46 47 bool mayAlias(const MachineFrameInfo*) const override { 48 // FIXME: If we ever change image intrinsics to accept fat pointers, then 49 // this could be true for some cases. 50 return false; 51 } 52 }; 53 54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 55 public: 56 explicit AMDGPUBufferPseudoSourceValue() : 57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 58 59 bool isConstant(const MachineFrameInfo *) const override { 60 // This should probably be true for most images, but we will start by being 61 // conservative. 62 return false; 63 } 64 65 bool isAliased(const MachineFrameInfo *) const override { 66 // FIXME: If we ever change image intrinsics to accept fat pointers, then 67 // this could be true for some cases. 68 return false; 69 } 70 71 bool mayAlias(const MachineFrameInfo*) const override { 72 // FIXME: If we ever change image intrinsics to accept fat pointers, then 73 // this could be true for some cases. 74 return false; 75 } 76 }; 77 78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 79 /// tells the hardware which interpolation parameters to load. 80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 81 // FIXME: This should be removed and getPreloadedValue moved here. 82 friend class SIRegisterInfo; 83 84 unsigned TIDReg; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg; 89 unsigned ScratchWaveOffsetReg; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg; 98 99 // Input registers for non-HSA ABI 100 unsigned ImplicitBufferPtrUserSGPR; 101 102 // Input registers setup for the HSA ABI. 103 // User SGPRs in allocation order. 104 unsigned PrivateSegmentBufferUserSGPR; 105 unsigned DispatchPtrUserSGPR; 106 unsigned QueuePtrUserSGPR; 107 unsigned KernargSegmentPtrUserSGPR; 108 unsigned DispatchIDUserSGPR; 109 unsigned FlatScratchInitUserSGPR; 110 unsigned PrivateSegmentSizeUserSGPR; 111 unsigned GridWorkGroupCountXUserSGPR; 112 unsigned GridWorkGroupCountYUserSGPR; 113 unsigned GridWorkGroupCountZUserSGPR; 114 115 // System SGPRs in allocation order. 116 unsigned WorkGroupIDXSystemSGPR; 117 unsigned WorkGroupIDYSystemSGPR; 118 unsigned WorkGroupIDZSystemSGPR; 119 unsigned WorkGroupInfoSystemSGPR; 120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR; 121 122 // VGPR inputs. These are always v0, v1 and v2 for entry functions. 123 unsigned WorkItemIDXVGPR; 124 unsigned WorkItemIDYVGPR; 125 unsigned WorkItemIDZVGPR; 126 127 // Graphics info. 128 unsigned PSInputAddr; 129 unsigned PSInputEnable; 130 131 bool ReturnsVoid; 132 133 // A pair of default/requested minimum/maximum flat work group sizes. 134 // Minimum - first, maximum - second. 135 std::pair<unsigned, unsigned> FlatWorkGroupSizes; 136 137 // A pair of default/requested minimum/maximum number of waves per execution 138 // unit. Minimum - first, maximum - second. 139 std::pair<unsigned, unsigned> WavesPerEU; 140 141 // Stack object indices for work group IDs. 142 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; 143 // Stack object indices for work item IDs. 144 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; 145 146 AMDGPUBufferPseudoSourceValue BufferPSV; 147 AMDGPUImagePseudoSourceValue ImagePSV; 148 149 private: 150 unsigned LDSWaveSpillSize; 151 unsigned ScratchOffsetReg; 152 unsigned NumUserSGPRs; 153 unsigned NumSystemSGPRs; 154 155 bool HasSpilledSGPRs; 156 bool HasSpilledVGPRs; 157 bool HasNonSpillStackObjects; 158 159 unsigned NumSpilledSGPRs; 160 unsigned NumSpilledVGPRs; 161 162 // Feature bits required for inputs passed in user SGPRs. 163 bool PrivateSegmentBuffer : 1; 164 bool DispatchPtr : 1; 165 bool QueuePtr : 1; 166 bool KernargSegmentPtr : 1; 167 bool DispatchID : 1; 168 bool FlatScratchInit : 1; 169 bool GridWorkgroupCountX : 1; 170 bool GridWorkgroupCountY : 1; 171 bool GridWorkgroupCountZ : 1; 172 173 // Feature bits required for inputs passed in system SGPRs. 174 bool WorkGroupIDX : 1; // Always initialized. 175 bool WorkGroupIDY : 1; 176 bool WorkGroupIDZ : 1; 177 bool WorkGroupInfo : 1; 178 bool PrivateSegmentWaveByteOffset : 1; 179 180 bool WorkItemIDX : 1; // Always initialized. 181 bool WorkItemIDY : 1; 182 bool WorkItemIDZ : 1; 183 184 // Private memory buffer 185 // Compute directly in sgpr[0:1] 186 // Other shaders indirect 64-bits at sgpr[0:1] 187 bool ImplicitBufferPtr : 1; 188 189 MCPhysReg getNextUserSGPR() const { 190 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 191 return AMDGPU::SGPR0 + NumUserSGPRs; 192 } 193 194 MCPhysReg getNextSystemSGPR() const { 195 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 196 } 197 198 public: 199 struct SpilledReg { 200 unsigned VGPR = AMDGPU::NoRegister; 201 int Lane = -1; 202 203 SpilledReg() = default; 204 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } 205 206 bool hasLane() { return Lane != -1;} 207 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 208 }; 209 210 private: 211 // SGPR->VGPR spilling support. 212 typedef std::pair<unsigned, unsigned> SpillRegMask; 213 214 // Track VGPR + wave index for each subregister of the SGPR spilled to 215 // frameindex key. 216 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 217 unsigned NumVGPRSpillLanes = 0; 218 SmallVector<unsigned, 2> SpillVGPRs; 219 220 public: 221 222 SIMachineFunctionInfo(const MachineFunction &MF); 223 224 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 225 auto I = SGPRToVGPRSpills.find(FrameIndex); 226 return (I == SGPRToVGPRSpills.end()) ? 227 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 228 } 229 230 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 231 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 232 233 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; 234 unsigned getTIDReg() const { return TIDReg; }; 235 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 236 237 // Add user SGPRs. 238 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 239 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 240 unsigned addQueuePtr(const SIRegisterInfo &TRI); 241 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 242 unsigned addDispatchID(const SIRegisterInfo &TRI); 243 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 244 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 245 246 // Add system SGPRs. 247 unsigned addWorkGroupIDX() { 248 WorkGroupIDXSystemSGPR = getNextSystemSGPR(); 249 NumSystemSGPRs += 1; 250 return WorkGroupIDXSystemSGPR; 251 } 252 253 unsigned addWorkGroupIDY() { 254 WorkGroupIDYSystemSGPR = getNextSystemSGPR(); 255 NumSystemSGPRs += 1; 256 return WorkGroupIDYSystemSGPR; 257 } 258 259 unsigned addWorkGroupIDZ() { 260 WorkGroupIDZSystemSGPR = getNextSystemSGPR(); 261 NumSystemSGPRs += 1; 262 return WorkGroupIDZSystemSGPR; 263 } 264 265 unsigned addWorkGroupInfo() { 266 WorkGroupInfoSystemSGPR = getNextSystemSGPR(); 267 NumSystemSGPRs += 1; 268 return WorkGroupInfoSystemSGPR; 269 } 270 271 unsigned addPrivateSegmentWaveByteOffset() { 272 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); 273 NumSystemSGPRs += 1; 274 return PrivateSegmentWaveByteOffsetSystemSGPR; 275 } 276 277 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 278 PrivateSegmentWaveByteOffsetSystemSGPR = Reg; 279 } 280 281 bool hasPrivateSegmentBuffer() const { 282 return PrivateSegmentBuffer; 283 } 284 285 bool hasDispatchPtr() const { 286 return DispatchPtr; 287 } 288 289 bool hasQueuePtr() const { 290 return QueuePtr; 291 } 292 293 bool hasKernargSegmentPtr() const { 294 return KernargSegmentPtr; 295 } 296 297 bool hasDispatchID() const { 298 return DispatchID; 299 } 300 301 bool hasFlatScratchInit() const { 302 return FlatScratchInit; 303 } 304 305 bool hasGridWorkgroupCountX() const { 306 return GridWorkgroupCountX; 307 } 308 309 bool hasGridWorkgroupCountY() const { 310 return GridWorkgroupCountY; 311 } 312 313 bool hasGridWorkgroupCountZ() const { 314 return GridWorkgroupCountZ; 315 } 316 317 bool hasWorkGroupIDX() const { 318 return WorkGroupIDX; 319 } 320 321 bool hasWorkGroupIDY() const { 322 return WorkGroupIDY; 323 } 324 325 bool hasWorkGroupIDZ() const { 326 return WorkGroupIDZ; 327 } 328 329 bool hasWorkGroupInfo() const { 330 return WorkGroupInfo; 331 } 332 333 bool hasPrivateSegmentWaveByteOffset() const { 334 return PrivateSegmentWaveByteOffset; 335 } 336 337 bool hasWorkItemIDX() const { 338 return WorkItemIDX; 339 } 340 341 bool hasWorkItemIDY() const { 342 return WorkItemIDY; 343 } 344 345 bool hasWorkItemIDZ() const { 346 return WorkItemIDZ; 347 } 348 349 bool hasImplicitBufferPtr() const { 350 return ImplicitBufferPtr; 351 } 352 353 unsigned getNumUserSGPRs() const { 354 return NumUserSGPRs; 355 } 356 357 unsigned getNumPreloadedSGPRs() const { 358 return NumUserSGPRs + NumSystemSGPRs; 359 } 360 361 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 362 return PrivateSegmentWaveByteOffsetSystemSGPR; 363 } 364 365 /// \brief Returns the physical register reserved for use as the resource 366 /// descriptor for scratch accesses. 367 unsigned getScratchRSrcReg() const { 368 return ScratchRSrcReg; 369 } 370 371 void setScratchRSrcReg(unsigned Reg) { 372 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 373 ScratchRSrcReg = Reg; 374 } 375 376 unsigned getScratchWaveOffsetReg() const { 377 return ScratchWaveOffsetReg; 378 } 379 380 unsigned getFrameOffsetReg() const { 381 return FrameOffsetReg; 382 } 383 384 void setStackPtrOffsetReg(unsigned Reg) { 385 StackPtrOffsetReg = Reg; 386 } 387 388 // Note the unset value for this is AMDGPU::SP_REG rather than 389 // NoRegister. This is mostly a workaround for MIR tests where state that 390 // can't be directly computed from the function is not preserved in serialized 391 // MIR. 392 unsigned getStackPtrOffsetReg() const { 393 return StackPtrOffsetReg; 394 } 395 396 void setScratchWaveOffsetReg(unsigned Reg) { 397 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 398 ScratchWaveOffsetReg = Reg; 399 if (isEntryFunction()) 400 FrameOffsetReg = ScratchWaveOffsetReg; 401 } 402 403 unsigned getQueuePtrUserSGPR() const { 404 return QueuePtrUserSGPR; 405 } 406 407 unsigned getImplicitBufferPtrUserSGPR() const { 408 return ImplicitBufferPtrUserSGPR; 409 } 410 411 bool hasSpilledSGPRs() const { 412 return HasSpilledSGPRs; 413 } 414 415 void setHasSpilledSGPRs(bool Spill = true) { 416 HasSpilledSGPRs = Spill; 417 } 418 419 bool hasSpilledVGPRs() const { 420 return HasSpilledVGPRs; 421 } 422 423 void setHasSpilledVGPRs(bool Spill = true) { 424 HasSpilledVGPRs = Spill; 425 } 426 427 bool hasNonSpillStackObjects() const { 428 return HasNonSpillStackObjects; 429 } 430 431 void setHasNonSpillStackObjects(bool StackObject = true) { 432 HasNonSpillStackObjects = StackObject; 433 } 434 435 unsigned getNumSpilledSGPRs() const { 436 return NumSpilledSGPRs; 437 } 438 439 unsigned getNumSpilledVGPRs() const { 440 return NumSpilledVGPRs; 441 } 442 443 void addToSpilledSGPRs(unsigned num) { 444 NumSpilledSGPRs += num; 445 } 446 447 void addToSpilledVGPRs(unsigned num) { 448 NumSpilledVGPRs += num; 449 } 450 451 unsigned getPSInputAddr() const { 452 return PSInputAddr; 453 } 454 455 unsigned getPSInputEnable() const { 456 return PSInputEnable; 457 } 458 459 bool isPSInputAllocated(unsigned Index) const { 460 return PSInputAddr & (1 << Index); 461 } 462 463 void markPSInputAllocated(unsigned Index) { 464 PSInputAddr |= 1 << Index; 465 } 466 467 void markPSInputEnabled(unsigned Index) { 468 PSInputEnable |= 1 << Index; 469 } 470 471 bool returnsVoid() const { 472 return ReturnsVoid; 473 } 474 475 void setIfReturnsVoid(bool Value) { 476 ReturnsVoid = Value; 477 } 478 479 /// \returns A pair of default/requested minimum/maximum flat work group sizes 480 /// for this function. 481 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 482 return FlatWorkGroupSizes; 483 } 484 485 /// \returns Default/requested minimum flat work group size for this function. 486 unsigned getMinFlatWorkGroupSize() const { 487 return FlatWorkGroupSizes.first; 488 } 489 490 /// \returns Default/requested maximum flat work group size for this function. 491 unsigned getMaxFlatWorkGroupSize() const { 492 return FlatWorkGroupSizes.second; 493 } 494 495 /// \returns A pair of default/requested minimum/maximum number of waves per 496 /// execution unit. 497 std::pair<unsigned, unsigned> getWavesPerEU() const { 498 return WavesPerEU; 499 } 500 501 /// \returns Default/requested minimum number of waves per execution unit. 502 unsigned getMinWavesPerEU() const { 503 return WavesPerEU.first; 504 } 505 506 /// \returns Default/requested maximum number of waves per execution unit. 507 unsigned getMaxWavesPerEU() const { 508 return WavesPerEU.second; 509 } 510 511 /// \returns Stack object index for \p Dim's work group ID. 512 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 513 assert(Dim < 3); 514 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 515 } 516 517 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 518 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 519 assert(Dim < 3); 520 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 521 } 522 523 /// \returns Stack object index for \p Dim's work item ID. 524 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 525 assert(Dim < 3); 526 return DebuggerWorkItemIDStackObjectIndices[Dim]; 527 } 528 529 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 530 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 531 assert(Dim < 3); 532 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 533 } 534 535 /// \returns SGPR used for \p Dim's work group ID. 536 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 537 switch (Dim) { 538 case 0: 539 assert(hasWorkGroupIDX()); 540 return WorkGroupIDXSystemSGPR; 541 case 1: 542 assert(hasWorkGroupIDY()); 543 return WorkGroupIDYSystemSGPR; 544 case 2: 545 assert(hasWorkGroupIDZ()); 546 return WorkGroupIDZSystemSGPR; 547 } 548 llvm_unreachable("unexpected dimension"); 549 } 550 551 /// \returns VGPR used for \p Dim' work item ID. 552 unsigned getWorkItemIDVGPR(unsigned Dim) const { 553 switch (Dim) { 554 case 0: 555 assert(hasWorkItemIDX()); 556 return AMDGPU::VGPR0; 557 case 1: 558 assert(hasWorkItemIDY()); 559 return AMDGPU::VGPR1; 560 case 2: 561 assert(hasWorkItemIDZ()); 562 return AMDGPU::VGPR2; 563 } 564 llvm_unreachable("unexpected dimension"); 565 } 566 567 unsigned getLDSWaveSpillSize() const { 568 return LDSWaveSpillSize; 569 } 570 571 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 572 return &BufferPSV; 573 } 574 575 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 576 return &ImagePSV; 577 } 578 }; 579 580 } // end namespace llvm 581 582 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 583