1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "SIRegisterInfo.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "llvm/CodeGen/PseudoSourceValue.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include <array> 24 #include <cassert> 25 #include <map> 26 #include <utility> 27 28 namespace llvm { 29 30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 31 public: 32 explicit AMDGPUImagePseudoSourceValue() : 33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 34 35 bool isConstant(const MachineFrameInfo *) const override { 36 // This should probably be true for most images, but we will start by being 37 // conservative. 38 return false; 39 } 40 41 bool isAliased(const MachineFrameInfo *) const override { 42 // FIXME: If we ever change image intrinsics to accept fat pointers, then 43 // this could be true for some cases. 44 return false; 45 } 46 47 bool mayAlias(const MachineFrameInfo*) const override { 48 // FIXME: If we ever change image intrinsics to accept fat pointers, then 49 // this could be true for some cases. 50 return false; 51 } 52 }; 53 54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 55 public: 56 explicit AMDGPUBufferPseudoSourceValue() : 57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 58 59 bool isConstant(const MachineFrameInfo *) const override { 60 // This should probably be true for most images, but we will start by being 61 // conservative. 62 return false; 63 } 64 65 bool isAliased(const MachineFrameInfo *) const override { 66 // FIXME: If we ever change image intrinsics to accept fat pointers, then 67 // this could be true for some cases. 68 return false; 69 } 70 71 bool mayAlias(const MachineFrameInfo*) const override { 72 // FIXME: If we ever change image intrinsics to accept fat pointers, then 73 // this could be true for some cases. 74 return false; 75 } 76 }; 77 78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 79 /// tells the hardware which interpolation parameters to load. 80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 81 // FIXME: This should be removed and getPreloadedValue moved here. 82 friend class SIRegisterInfo; 83 84 unsigned TIDReg; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg; 89 unsigned ScratchWaveOffsetReg; 90 91 // Input registers for non-HSA ABI 92 unsigned PrivateMemoryPtrUserSGPR; 93 94 // Input registers setup for the HSA ABI. 95 // User SGPRs in allocation order. 96 unsigned PrivateSegmentBufferUserSGPR; 97 unsigned DispatchPtrUserSGPR; 98 unsigned QueuePtrUserSGPR; 99 unsigned KernargSegmentPtrUserSGPR; 100 unsigned DispatchIDUserSGPR; 101 unsigned FlatScratchInitUserSGPR; 102 unsigned PrivateSegmentSizeUserSGPR; 103 unsigned GridWorkGroupCountXUserSGPR; 104 unsigned GridWorkGroupCountYUserSGPR; 105 unsigned GridWorkGroupCountZUserSGPR; 106 107 // System SGPRs in allocation order. 108 unsigned WorkGroupIDXSystemSGPR; 109 unsigned WorkGroupIDYSystemSGPR; 110 unsigned WorkGroupIDZSystemSGPR; 111 unsigned WorkGroupInfoSystemSGPR; 112 unsigned PrivateSegmentWaveByteOffsetSystemSGPR; 113 114 // Graphics info. 115 unsigned PSInputAddr; 116 bool ReturnsVoid; 117 118 // A pair of default/requested minimum/maximum flat work group sizes. 119 // Minimum - first, maximum - second. 120 std::pair<unsigned, unsigned> FlatWorkGroupSizes; 121 122 // A pair of default/requested minimum/maximum number of waves per execution 123 // unit. Minimum - first, maximum - second. 124 std::pair<unsigned, unsigned> WavesPerEU; 125 126 // Stack object indices for work group IDs. 127 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; 128 // Stack object indices for work item IDs. 129 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; 130 131 AMDGPUBufferPseudoSourceValue BufferPSV; 132 AMDGPUImagePseudoSourceValue ImagePSV; 133 134 public: 135 // FIXME: Make private 136 unsigned LDSWaveSpillSize; 137 unsigned PSInputEna; 138 139 140 unsigned ScratchOffsetReg; 141 unsigned NumUserSGPRs; 142 unsigned NumSystemSGPRs; 143 144 private: 145 bool HasSpilledSGPRs; 146 bool HasSpilledVGPRs; 147 bool HasNonSpillStackObjects; 148 149 unsigned NumSpilledSGPRs; 150 unsigned NumSpilledVGPRs; 151 152 // Feature bits required for inputs passed in user SGPRs. 153 bool PrivateSegmentBuffer : 1; 154 bool DispatchPtr : 1; 155 bool QueuePtr : 1; 156 bool KernargSegmentPtr : 1; 157 bool DispatchID : 1; 158 bool FlatScratchInit : 1; 159 bool GridWorkgroupCountX : 1; 160 bool GridWorkgroupCountY : 1; 161 bool GridWorkgroupCountZ : 1; 162 163 // Feature bits required for inputs passed in system SGPRs. 164 bool WorkGroupIDX : 1; // Always initialized. 165 bool WorkGroupIDY : 1; 166 bool WorkGroupIDZ : 1; 167 bool WorkGroupInfo : 1; 168 bool PrivateSegmentWaveByteOffset : 1; 169 170 bool WorkItemIDX : 1; // Always initialized. 171 bool WorkItemIDY : 1; 172 bool WorkItemIDZ : 1; 173 174 // Private memory buffer 175 // Compute directly in sgpr[0:1] 176 // Other shaders indirect 64-bits at sgpr[0:1] 177 bool PrivateMemoryInputPtr : 1; 178 179 MCPhysReg getNextUserSGPR() const { 180 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 181 return AMDGPU::SGPR0 + NumUserSGPRs; 182 } 183 184 MCPhysReg getNextSystemSGPR() const { 185 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 186 } 187 188 public: 189 struct SpilledReg { 190 unsigned VGPR = AMDGPU::NoRegister; 191 int Lane = -1; 192 193 SpilledReg() = default; 194 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } 195 196 bool hasLane() { return Lane != -1;} 197 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 198 }; 199 200 private: 201 // SGPR->VGPR spilling support. 202 typedef std::pair<unsigned, unsigned> SpillRegMask; 203 204 // Track VGPR + wave index for each subregister of the SGPR spilled to 205 // frameindex key. 206 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 207 unsigned NumVGPRSpillLanes = 0; 208 SmallVector<unsigned, 2> SpillVGPRs; 209 210 public: 211 212 SIMachineFunctionInfo(const MachineFunction &MF); 213 214 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 215 auto I = SGPRToVGPRSpills.find(FrameIndex); 216 return (I == SGPRToVGPRSpills.end()) ? 217 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 218 } 219 220 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 221 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 222 223 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; 224 unsigned getTIDReg() const { return TIDReg; }; 225 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 226 227 // Add user SGPRs. 228 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 229 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 230 unsigned addQueuePtr(const SIRegisterInfo &TRI); 231 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 232 unsigned addDispatchID(const SIRegisterInfo &TRI); 233 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 234 unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); 235 236 // Add system SGPRs. 237 unsigned addWorkGroupIDX() { 238 WorkGroupIDXSystemSGPR = getNextSystemSGPR(); 239 NumSystemSGPRs += 1; 240 return WorkGroupIDXSystemSGPR; 241 } 242 243 unsigned addWorkGroupIDY() { 244 WorkGroupIDYSystemSGPR = getNextSystemSGPR(); 245 NumSystemSGPRs += 1; 246 return WorkGroupIDYSystemSGPR; 247 } 248 249 unsigned addWorkGroupIDZ() { 250 WorkGroupIDZSystemSGPR = getNextSystemSGPR(); 251 NumSystemSGPRs += 1; 252 return WorkGroupIDZSystemSGPR; 253 } 254 255 unsigned addWorkGroupInfo() { 256 WorkGroupInfoSystemSGPR = getNextSystemSGPR(); 257 NumSystemSGPRs += 1; 258 return WorkGroupInfoSystemSGPR; 259 } 260 261 unsigned addPrivateSegmentWaveByteOffset() { 262 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); 263 NumSystemSGPRs += 1; 264 return PrivateSegmentWaveByteOffsetSystemSGPR; 265 } 266 267 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 268 PrivateSegmentWaveByteOffsetSystemSGPR = Reg; 269 } 270 271 bool hasPrivateSegmentBuffer() const { 272 return PrivateSegmentBuffer; 273 } 274 275 bool hasDispatchPtr() const { 276 return DispatchPtr; 277 } 278 279 bool hasQueuePtr() const { 280 return QueuePtr; 281 } 282 283 bool hasKernargSegmentPtr() const { 284 return KernargSegmentPtr; 285 } 286 287 bool hasDispatchID() const { 288 return DispatchID; 289 } 290 291 bool hasFlatScratchInit() const { 292 return FlatScratchInit; 293 } 294 295 bool hasGridWorkgroupCountX() const { 296 return GridWorkgroupCountX; 297 } 298 299 bool hasGridWorkgroupCountY() const { 300 return GridWorkgroupCountY; 301 } 302 303 bool hasGridWorkgroupCountZ() const { 304 return GridWorkgroupCountZ; 305 } 306 307 bool hasWorkGroupIDX() const { 308 return WorkGroupIDX; 309 } 310 311 bool hasWorkGroupIDY() const { 312 return WorkGroupIDY; 313 } 314 315 bool hasWorkGroupIDZ() const { 316 return WorkGroupIDZ; 317 } 318 319 bool hasWorkGroupInfo() const { 320 return WorkGroupInfo; 321 } 322 323 bool hasPrivateSegmentWaveByteOffset() const { 324 return PrivateSegmentWaveByteOffset; 325 } 326 327 bool hasWorkItemIDX() const { 328 return WorkItemIDX; 329 } 330 331 bool hasWorkItemIDY() const { 332 return WorkItemIDY; 333 } 334 335 bool hasWorkItemIDZ() const { 336 return WorkItemIDZ; 337 } 338 339 bool hasPrivateMemoryInputPtr() const { 340 return PrivateMemoryInputPtr; 341 } 342 343 unsigned getNumUserSGPRs() const { 344 return NumUserSGPRs; 345 } 346 347 unsigned getNumPreloadedSGPRs() const { 348 return NumUserSGPRs + NumSystemSGPRs; 349 } 350 351 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 352 return PrivateSegmentWaveByteOffsetSystemSGPR; 353 } 354 355 /// \brief Returns the physical register reserved for use as the resource 356 /// descriptor for scratch accesses. 357 unsigned getScratchRSrcReg() const { 358 return ScratchRSrcReg; 359 } 360 361 void setScratchRSrcReg(unsigned Reg) { 362 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 363 ScratchRSrcReg = Reg; 364 } 365 366 unsigned getScratchWaveOffsetReg() const { 367 return ScratchWaveOffsetReg; 368 } 369 370 void setScratchWaveOffsetReg(unsigned Reg) { 371 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 372 ScratchWaveOffsetReg = Reg; 373 } 374 375 unsigned getQueuePtrUserSGPR() const { 376 return QueuePtrUserSGPR; 377 } 378 379 unsigned getPrivateMemoryPtrUserSGPR() const { 380 return PrivateMemoryPtrUserSGPR; 381 } 382 383 bool hasSpilledSGPRs() const { 384 return HasSpilledSGPRs; 385 } 386 387 void setHasSpilledSGPRs(bool Spill = true) { 388 HasSpilledSGPRs = Spill; 389 } 390 391 bool hasSpilledVGPRs() const { 392 return HasSpilledVGPRs; 393 } 394 395 void setHasSpilledVGPRs(bool Spill = true) { 396 HasSpilledVGPRs = Spill; 397 } 398 399 bool hasNonSpillStackObjects() const { 400 return HasNonSpillStackObjects; 401 } 402 403 void setHasNonSpillStackObjects(bool StackObject = true) { 404 HasNonSpillStackObjects = StackObject; 405 } 406 407 unsigned getNumSpilledSGPRs() const { 408 return NumSpilledSGPRs; 409 } 410 411 unsigned getNumSpilledVGPRs() const { 412 return NumSpilledVGPRs; 413 } 414 415 void addToSpilledSGPRs(unsigned num) { 416 NumSpilledSGPRs += num; 417 } 418 419 void addToSpilledVGPRs(unsigned num) { 420 NumSpilledVGPRs += num; 421 } 422 423 unsigned getPSInputAddr() const { 424 return PSInputAddr; 425 } 426 427 bool isPSInputAllocated(unsigned Index) const { 428 return PSInputAddr & (1 << Index); 429 } 430 431 void markPSInputAllocated(unsigned Index) { 432 PSInputAddr |= 1 << Index; 433 } 434 435 bool returnsVoid() const { 436 return ReturnsVoid; 437 } 438 439 void setIfReturnsVoid(bool Value) { 440 ReturnsVoid = Value; 441 } 442 443 /// \returns A pair of default/requested minimum/maximum flat work group sizes 444 /// for this function. 445 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 446 return FlatWorkGroupSizes; 447 } 448 449 /// \returns Default/requested minimum flat work group size for this function. 450 unsigned getMinFlatWorkGroupSize() const { 451 return FlatWorkGroupSizes.first; 452 } 453 454 /// \returns Default/requested maximum flat work group size for this function. 455 unsigned getMaxFlatWorkGroupSize() const { 456 return FlatWorkGroupSizes.second; 457 } 458 459 /// \returns A pair of default/requested minimum/maximum number of waves per 460 /// execution unit. 461 std::pair<unsigned, unsigned> getWavesPerEU() const { 462 return WavesPerEU; 463 } 464 465 /// \returns Default/requested minimum number of waves per execution unit. 466 unsigned getMinWavesPerEU() const { 467 return WavesPerEU.first; 468 } 469 470 /// \returns Default/requested maximum number of waves per execution unit. 471 unsigned getMaxWavesPerEU() const { 472 return WavesPerEU.second; 473 } 474 475 /// \returns Stack object index for \p Dim's work group ID. 476 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 477 assert(Dim < 3); 478 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 479 } 480 481 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 482 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 483 assert(Dim < 3); 484 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 485 } 486 487 /// \returns Stack object index for \p Dim's work item ID. 488 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 489 assert(Dim < 3); 490 return DebuggerWorkItemIDStackObjectIndices[Dim]; 491 } 492 493 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 494 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 495 assert(Dim < 3); 496 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 497 } 498 499 /// \returns SGPR used for \p Dim's work group ID. 500 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 501 switch (Dim) { 502 case 0: 503 assert(hasWorkGroupIDX()); 504 return WorkGroupIDXSystemSGPR; 505 case 1: 506 assert(hasWorkGroupIDY()); 507 return WorkGroupIDYSystemSGPR; 508 case 2: 509 assert(hasWorkGroupIDZ()); 510 return WorkGroupIDZSystemSGPR; 511 } 512 llvm_unreachable("unexpected dimension"); 513 } 514 515 /// \returns VGPR used for \p Dim' work item ID. 516 unsigned getWorkItemIDVGPR(unsigned Dim) const { 517 switch (Dim) { 518 case 0: 519 assert(hasWorkItemIDX()); 520 return AMDGPU::VGPR0; 521 case 1: 522 assert(hasWorkItemIDY()); 523 return AMDGPU::VGPR1; 524 case 2: 525 assert(hasWorkItemIDZ()); 526 return AMDGPU::VGPR2; 527 } 528 llvm_unreachable("unexpected dimension"); 529 } 530 531 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 532 return &BufferPSV; 533 } 534 535 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 536 return &ImagePSV; 537 } 538 }; 539 540 } // end namespace llvm 541 542 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 543