1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include <array> 29 #include <cassert> 30 #include <utility> 31 #include <vector> 32 33 namespace llvm { 34 35 class MachineFrameInfo; 36 class MachineFunction; 37 class SIInstrInfo; 38 class TargetRegisterClass; 39 40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 41 public: 42 // TODO: Is the img rsrc useful? 43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 45 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 return true; 54 } 55 56 bool mayAlias(const MachineFrameInfo *) const override { 57 return true; 58 } 59 }; 60 61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 62 public: 63 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 64 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 65 66 bool isConstant(const MachineFrameInfo *) const override { 67 // This should probably be true for most images, but we will start by being 68 // conservative. 69 return false; 70 } 71 72 bool isAliased(const MachineFrameInfo *) const override { 73 return true; 74 } 75 76 bool mayAlias(const MachineFrameInfo *) const override { 77 return true; 78 } 79 }; 80 81 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 82 /// tells the hardware which interpolation parameters to load. 83 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 84 unsigned TIDReg = AMDGPU::NoRegister; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 89 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg = AMDGPU::FP_REG; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 98 99 AMDGPUFunctionArgInfo ArgInfo; 100 101 // Graphics info. 102 unsigned PSInputAddr = 0; 103 unsigned PSInputEnable = 0; 104 105 /// Number of bytes of arguments this function has on the stack. If the callee 106 /// is expected to restore the argument stack this should be a multiple of 16, 107 /// all usable during a tail call. 108 /// 109 /// The alternative would forbid tail call optimisation in some cases: if we 110 /// want to transfer control from a function with 8-bytes of stack-argument 111 /// space to a function with 16-bytes then misalignment of this value would 112 /// make a stack adjustment necessary, which could not be undone by the 113 /// callee. 114 unsigned BytesInStackArgArea = 0; 115 116 bool ReturnsVoid = true; 117 118 // A pair of default/requested minimum/maximum flat work group sizes. 119 // Minimum - first, maximum - second. 120 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 121 122 // A pair of default/requested minimum/maximum number of waves per execution 123 // unit. Minimum - first, maximum - second. 124 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 125 126 // Stack object indices for work group IDs. 127 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 128 129 // Stack object indices for work item IDs. 130 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 131 132 DenseMap<const Value *, 133 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 134 DenseMap<const Value *, 135 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 136 137 private: 138 unsigned LDSWaveSpillSize = 0; 139 unsigned NumUserSGPRs = 0; 140 unsigned NumSystemSGPRs = 0; 141 142 bool HasSpilledSGPRs = false; 143 bool HasSpilledVGPRs = false; 144 bool HasNonSpillStackObjects = false; 145 146 unsigned NumSpilledSGPRs = 0; 147 unsigned NumSpilledVGPRs = 0; 148 149 // Feature bits required for inputs passed in user SGPRs. 150 bool PrivateSegmentBuffer : 1; 151 bool DispatchPtr : 1; 152 bool QueuePtr : 1; 153 bool KernargSegmentPtr : 1; 154 bool DispatchID : 1; 155 bool FlatScratchInit : 1; 156 bool GridWorkgroupCountX : 1; 157 bool GridWorkgroupCountY : 1; 158 bool GridWorkgroupCountZ : 1; 159 160 // Feature bits required for inputs passed in system SGPRs. 161 bool WorkGroupIDX : 1; // Always initialized. 162 bool WorkGroupIDY : 1; 163 bool WorkGroupIDZ : 1; 164 bool WorkGroupInfo : 1; 165 bool PrivateSegmentWaveByteOffset : 1; 166 167 bool WorkItemIDX : 1; // Always initialized. 168 bool WorkItemIDY : 1; 169 bool WorkItemIDZ : 1; 170 171 // Private memory buffer 172 // Compute directly in sgpr[0:1] 173 // Other shaders indirect 64-bits at sgpr[0:1] 174 bool ImplicitBufferPtr : 1; 175 176 // Pointer to where the ABI inserts special kernel arguments separate from the 177 // user arguments. This is an offset from the KernargSegmentPtr. 178 bool ImplicitArgPtr : 1; 179 180 // The hard-wired high half of the address of the global information table 181 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 182 // current hardware only allows a 16 bit value. 183 unsigned GITPtrHigh; 184 185 unsigned HighBitsOf32BitAddress; 186 187 MCPhysReg getNextUserSGPR() const { 188 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 189 return AMDGPU::SGPR0 + NumUserSGPRs; 190 } 191 192 MCPhysReg getNextSystemSGPR() const { 193 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 194 } 195 196 public: 197 struct SpilledReg { 198 unsigned VGPR = AMDGPU::NoRegister; 199 int Lane = -1; 200 201 SpilledReg() = default; 202 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 203 204 bool hasLane() { return Lane != -1;} 205 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 206 }; 207 208 struct SGPRSpillVGPRCSR { 209 // VGPR used for SGPR spills 210 unsigned VGPR; 211 212 // If the VGPR is a CSR, the stack slot used to save/restore it in the 213 // prolog/epilog. 214 Optional<int> FI; 215 216 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 217 }; 218 219 private: 220 // SGPR->VGPR spilling support. 221 using SpillRegMask = std::pair<unsigned, unsigned>; 222 223 // Track VGPR + wave index for each subregister of the SGPR spilled to 224 // frameindex key. 225 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 226 unsigned NumVGPRSpillLanes = 0; 227 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 228 229 public: 230 SIMachineFunctionInfo(const MachineFunction &MF); 231 232 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 233 auto I = SGPRToVGPRSpills.find(FrameIndex); 234 return (I == SGPRToVGPRSpills.end()) ? 235 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 236 } 237 238 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 239 return SpillVGPRs; 240 } 241 242 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 243 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 244 245 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } 246 unsigned getTIDReg() const { return TIDReg; } 247 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 248 249 unsigned getBytesInStackArgArea() const { 250 return BytesInStackArgArea; 251 } 252 253 void setBytesInStackArgArea(unsigned Bytes) { 254 BytesInStackArgArea = Bytes; 255 } 256 257 // Add user SGPRs. 258 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 259 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 260 unsigned addQueuePtr(const SIRegisterInfo &TRI); 261 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 262 unsigned addDispatchID(const SIRegisterInfo &TRI); 263 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 264 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 265 266 // Add system SGPRs. 267 unsigned addWorkGroupIDX() { 268 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 269 NumSystemSGPRs += 1; 270 return ArgInfo.WorkGroupIDX.getRegister(); 271 } 272 273 unsigned addWorkGroupIDY() { 274 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 275 NumSystemSGPRs += 1; 276 return ArgInfo.WorkGroupIDY.getRegister(); 277 } 278 279 unsigned addWorkGroupIDZ() { 280 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 281 NumSystemSGPRs += 1; 282 return ArgInfo.WorkGroupIDZ.getRegister(); 283 } 284 285 unsigned addWorkGroupInfo() { 286 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 287 NumSystemSGPRs += 1; 288 return ArgInfo.WorkGroupInfo.getRegister(); 289 } 290 291 // Add special VGPR inputs 292 void setWorkItemIDX(ArgDescriptor Arg) { 293 ArgInfo.WorkItemIDX = Arg; 294 } 295 296 void setWorkItemIDY(ArgDescriptor Arg) { 297 ArgInfo.WorkItemIDY = Arg; 298 } 299 300 void setWorkItemIDZ(ArgDescriptor Arg) { 301 ArgInfo.WorkItemIDZ = Arg; 302 } 303 304 unsigned addPrivateSegmentWaveByteOffset() { 305 ArgInfo.PrivateSegmentWaveByteOffset 306 = ArgDescriptor::createRegister(getNextSystemSGPR()); 307 NumSystemSGPRs += 1; 308 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 309 } 310 311 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 312 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 313 } 314 315 bool hasPrivateSegmentBuffer() const { 316 return PrivateSegmentBuffer; 317 } 318 319 bool hasDispatchPtr() const { 320 return DispatchPtr; 321 } 322 323 bool hasQueuePtr() const { 324 return QueuePtr; 325 } 326 327 bool hasKernargSegmentPtr() const { 328 return KernargSegmentPtr; 329 } 330 331 bool hasDispatchID() const { 332 return DispatchID; 333 } 334 335 bool hasFlatScratchInit() const { 336 return FlatScratchInit; 337 } 338 339 bool hasGridWorkgroupCountX() const { 340 return GridWorkgroupCountX; 341 } 342 343 bool hasGridWorkgroupCountY() const { 344 return GridWorkgroupCountY; 345 } 346 347 bool hasGridWorkgroupCountZ() const { 348 return GridWorkgroupCountZ; 349 } 350 351 bool hasWorkGroupIDX() const { 352 return WorkGroupIDX; 353 } 354 355 bool hasWorkGroupIDY() const { 356 return WorkGroupIDY; 357 } 358 359 bool hasWorkGroupIDZ() const { 360 return WorkGroupIDZ; 361 } 362 363 bool hasWorkGroupInfo() const { 364 return WorkGroupInfo; 365 } 366 367 bool hasPrivateSegmentWaveByteOffset() const { 368 return PrivateSegmentWaveByteOffset; 369 } 370 371 bool hasWorkItemIDX() const { 372 return WorkItemIDX; 373 } 374 375 bool hasWorkItemIDY() const { 376 return WorkItemIDY; 377 } 378 379 bool hasWorkItemIDZ() const { 380 return WorkItemIDZ; 381 } 382 383 bool hasImplicitArgPtr() const { 384 return ImplicitArgPtr; 385 } 386 387 bool hasImplicitBufferPtr() const { 388 return ImplicitBufferPtr; 389 } 390 391 AMDGPUFunctionArgInfo &getArgInfo() { 392 return ArgInfo; 393 } 394 395 const AMDGPUFunctionArgInfo &getArgInfo() const { 396 return ArgInfo; 397 } 398 399 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 400 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 401 return ArgInfo.getPreloadedValue(Value); 402 } 403 404 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 405 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 406 } 407 408 unsigned getGITPtrHigh() const { 409 return GITPtrHigh; 410 } 411 412 unsigned get32BitAddressHighBits() const { 413 return HighBitsOf32BitAddress; 414 } 415 416 unsigned getNumUserSGPRs() const { 417 return NumUserSGPRs; 418 } 419 420 unsigned getNumPreloadedSGPRs() const { 421 return NumUserSGPRs + NumSystemSGPRs; 422 } 423 424 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 425 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 426 } 427 428 /// \brief Returns the physical register reserved for use as the resource 429 /// descriptor for scratch accesses. 430 unsigned getScratchRSrcReg() const { 431 return ScratchRSrcReg; 432 } 433 434 void setScratchRSrcReg(unsigned Reg) { 435 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 436 ScratchRSrcReg = Reg; 437 } 438 439 unsigned getScratchWaveOffsetReg() const { 440 return ScratchWaveOffsetReg; 441 } 442 443 unsigned getFrameOffsetReg() const { 444 return FrameOffsetReg; 445 } 446 447 void setStackPtrOffsetReg(unsigned Reg) { 448 StackPtrOffsetReg = Reg; 449 } 450 451 // Note the unset value for this is AMDGPU::SP_REG rather than 452 // NoRegister. This is mostly a workaround for MIR tests where state that 453 // can't be directly computed from the function is not preserved in serialized 454 // MIR. 455 unsigned getStackPtrOffsetReg() const { 456 return StackPtrOffsetReg; 457 } 458 459 void setScratchWaveOffsetReg(unsigned Reg) { 460 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 461 ScratchWaveOffsetReg = Reg; 462 if (isEntryFunction()) 463 FrameOffsetReg = ScratchWaveOffsetReg; 464 } 465 466 unsigned getQueuePtrUserSGPR() const { 467 return ArgInfo.QueuePtr.getRegister(); 468 } 469 470 unsigned getImplicitBufferPtrUserSGPR() const { 471 return ArgInfo.ImplicitBufferPtr.getRegister(); 472 } 473 474 bool hasSpilledSGPRs() const { 475 return HasSpilledSGPRs; 476 } 477 478 void setHasSpilledSGPRs(bool Spill = true) { 479 HasSpilledSGPRs = Spill; 480 } 481 482 bool hasSpilledVGPRs() const { 483 return HasSpilledVGPRs; 484 } 485 486 void setHasSpilledVGPRs(bool Spill = true) { 487 HasSpilledVGPRs = Spill; 488 } 489 490 bool hasNonSpillStackObjects() const { 491 return HasNonSpillStackObjects; 492 } 493 494 void setHasNonSpillStackObjects(bool StackObject = true) { 495 HasNonSpillStackObjects = StackObject; 496 } 497 498 unsigned getNumSpilledSGPRs() const { 499 return NumSpilledSGPRs; 500 } 501 502 unsigned getNumSpilledVGPRs() const { 503 return NumSpilledVGPRs; 504 } 505 506 void addToSpilledSGPRs(unsigned num) { 507 NumSpilledSGPRs += num; 508 } 509 510 void addToSpilledVGPRs(unsigned num) { 511 NumSpilledVGPRs += num; 512 } 513 514 unsigned getPSInputAddr() const { 515 return PSInputAddr; 516 } 517 518 unsigned getPSInputEnable() const { 519 return PSInputEnable; 520 } 521 522 bool isPSInputAllocated(unsigned Index) const { 523 return PSInputAddr & (1 << Index); 524 } 525 526 void markPSInputAllocated(unsigned Index) { 527 PSInputAddr |= 1 << Index; 528 } 529 530 void markPSInputEnabled(unsigned Index) { 531 PSInputEnable |= 1 << Index; 532 } 533 534 bool returnsVoid() const { 535 return ReturnsVoid; 536 } 537 538 void setIfReturnsVoid(bool Value) { 539 ReturnsVoid = Value; 540 } 541 542 /// \returns A pair of default/requested minimum/maximum flat work group sizes 543 /// for this function. 544 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 545 return FlatWorkGroupSizes; 546 } 547 548 /// \returns Default/requested minimum flat work group size for this function. 549 unsigned getMinFlatWorkGroupSize() const { 550 return FlatWorkGroupSizes.first; 551 } 552 553 /// \returns Default/requested maximum flat work group size for this function. 554 unsigned getMaxFlatWorkGroupSize() const { 555 return FlatWorkGroupSizes.second; 556 } 557 558 /// \returns A pair of default/requested minimum/maximum number of waves per 559 /// execution unit. 560 std::pair<unsigned, unsigned> getWavesPerEU() const { 561 return WavesPerEU; 562 } 563 564 /// \returns Default/requested minimum number of waves per execution unit. 565 unsigned getMinWavesPerEU() const { 566 return WavesPerEU.first; 567 } 568 569 /// \returns Default/requested maximum number of waves per execution unit. 570 unsigned getMaxWavesPerEU() const { 571 return WavesPerEU.second; 572 } 573 574 /// \returns Stack object index for \p Dim's work group ID. 575 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 576 assert(Dim < 3); 577 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 578 } 579 580 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 581 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 582 assert(Dim < 3); 583 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 584 } 585 586 /// \returns Stack object index for \p Dim's work item ID. 587 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 588 assert(Dim < 3); 589 return DebuggerWorkItemIDStackObjectIndices[Dim]; 590 } 591 592 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 593 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 594 assert(Dim < 3); 595 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 596 } 597 598 /// \returns SGPR used for \p Dim's work group ID. 599 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 600 switch (Dim) { 601 case 0: 602 assert(hasWorkGroupIDX()); 603 return ArgInfo.WorkGroupIDX.getRegister(); 604 case 1: 605 assert(hasWorkGroupIDY()); 606 return ArgInfo.WorkGroupIDY.getRegister(); 607 case 2: 608 assert(hasWorkGroupIDZ()); 609 return ArgInfo.WorkGroupIDZ.getRegister(); 610 } 611 llvm_unreachable("unexpected dimension"); 612 } 613 614 /// \returns VGPR used for \p Dim' work item ID. 615 unsigned getWorkItemIDVGPR(unsigned Dim) const { 616 switch (Dim) { 617 case 0: 618 assert(hasWorkItemIDX()); 619 return AMDGPU::VGPR0; 620 case 1: 621 assert(hasWorkItemIDY()); 622 return AMDGPU::VGPR1; 623 case 2: 624 assert(hasWorkItemIDZ()); 625 return AMDGPU::VGPR2; 626 } 627 llvm_unreachable("unexpected dimension"); 628 } 629 630 unsigned getLDSWaveSpillSize() const { 631 return LDSWaveSpillSize; 632 } 633 634 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 635 const Value *BufferRsrc) { 636 assert(BufferRsrc); 637 auto PSV = BufferPSVs.try_emplace( 638 BufferRsrc, 639 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 640 return PSV.first->second.get(); 641 } 642 643 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 644 const Value *ImgRsrc) { 645 assert(ImgRsrc); 646 auto PSV = ImagePSVs.try_emplace( 647 ImgRsrc, 648 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 649 return PSV.first->second.get(); 650 } 651 }; 652 653 } // end namespace llvm 654 655 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 656