1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include <array> 29 #include <cassert> 30 #include <utility> 31 #include <vector> 32 33 namespace llvm { 34 35 class MachineFrameInfo; 36 class MachineFunction; 37 class SIInstrInfo; 38 class TargetRegisterClass; 39 40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 41 public: 42 // TODO: Is the img rsrc useful? 43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 45 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 return true; 54 } 55 56 bool mayAlias(const MachineFrameInfo *) const override { 57 return true; 58 } 59 }; 60 61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 62 public: 63 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 64 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 65 66 bool isConstant(const MachineFrameInfo *) const override { 67 // This should probably be true for most images, but we will start by being 68 // conservative. 69 return false; 70 } 71 72 bool isAliased(const MachineFrameInfo *) const override { 73 return true; 74 } 75 76 bool mayAlias(const MachineFrameInfo *) const override { 77 return true; 78 } 79 }; 80 81 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 82 /// tells the hardware which interpolation parameters to load. 83 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 84 unsigned TIDReg = AMDGPU::NoRegister; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 89 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg = AMDGPU::FP_REG; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 98 99 AMDGPUFunctionArgInfo ArgInfo; 100 101 // Graphics info. 102 unsigned PSInputAddr = 0; 103 unsigned PSInputEnable = 0; 104 105 /// Number of bytes of arguments this function has on the stack. If the callee 106 /// is expected to restore the argument stack this should be a multiple of 16, 107 /// all usable during a tail call. 108 /// 109 /// The alternative would forbid tail call optimisation in some cases: if we 110 /// want to transfer control from a function with 8-bytes of stack-argument 111 /// space to a function with 16-bytes then misalignment of this value would 112 /// make a stack adjustment necessary, which could not be undone by the 113 /// callee. 114 unsigned BytesInStackArgArea = 0; 115 116 bool ReturnsVoid = true; 117 118 // A pair of default/requested minimum/maximum flat work group sizes. 119 // Minimum - first, maximum - second. 120 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 121 122 // A pair of default/requested minimum/maximum number of waves per execution 123 // unit. Minimum - first, maximum - second. 124 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 125 126 // Stack object indices for work group IDs. 127 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 128 129 // Stack object indices for work item IDs. 130 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 131 132 DenseMap<const Value *, 133 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 134 DenseMap<const Value *, 135 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 136 137 private: 138 unsigned LDSWaveSpillSize = 0; 139 unsigned NumUserSGPRs = 0; 140 unsigned NumSystemSGPRs = 0; 141 142 bool HasSpilledSGPRs = false; 143 bool HasSpilledVGPRs = false; 144 bool HasNonSpillStackObjects = false; 145 bool IsStackRealigned = false; 146 147 unsigned NumSpilledSGPRs = 0; 148 unsigned NumSpilledVGPRs = 0; 149 150 // Feature bits required for inputs passed in user SGPRs. 151 bool PrivateSegmentBuffer : 1; 152 bool DispatchPtr : 1; 153 bool QueuePtr : 1; 154 bool KernargSegmentPtr : 1; 155 bool DispatchID : 1; 156 bool FlatScratchInit : 1; 157 bool GridWorkgroupCountX : 1; 158 bool GridWorkgroupCountY : 1; 159 bool GridWorkgroupCountZ : 1; 160 161 // Feature bits required for inputs passed in system SGPRs. 162 bool WorkGroupIDX : 1; // Always initialized. 163 bool WorkGroupIDY : 1; 164 bool WorkGroupIDZ : 1; 165 bool WorkGroupInfo : 1; 166 bool PrivateSegmentWaveByteOffset : 1; 167 168 bool WorkItemIDX : 1; // Always initialized. 169 bool WorkItemIDY : 1; 170 bool WorkItemIDZ : 1; 171 172 // Private memory buffer 173 // Compute directly in sgpr[0:1] 174 // Other shaders indirect 64-bits at sgpr[0:1] 175 bool ImplicitBufferPtr : 1; 176 177 // Pointer to where the ABI inserts special kernel arguments separate from the 178 // user arguments. This is an offset from the KernargSegmentPtr. 179 bool ImplicitArgPtr : 1; 180 181 // The hard-wired high half of the address of the global information table 182 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 183 // current hardware only allows a 16 bit value. 184 unsigned GITPtrHigh; 185 186 unsigned HighBitsOf32BitAddress; 187 188 MCPhysReg getNextUserSGPR() const { 189 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 190 return AMDGPU::SGPR0 + NumUserSGPRs; 191 } 192 193 MCPhysReg getNextSystemSGPR() const { 194 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 195 } 196 197 public: 198 struct SpilledReg { 199 unsigned VGPR = AMDGPU::NoRegister; 200 int Lane = -1; 201 202 SpilledReg() = default; 203 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 204 205 bool hasLane() { return Lane != -1;} 206 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 207 }; 208 209 struct SGPRSpillVGPRCSR { 210 // VGPR used for SGPR spills 211 unsigned VGPR; 212 213 // If the VGPR is a CSR, the stack slot used to save/restore it in the 214 // prolog/epilog. 215 Optional<int> FI; 216 217 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 218 }; 219 220 private: 221 // SGPR->VGPR spilling support. 222 using SpillRegMask = std::pair<unsigned, unsigned>; 223 224 // Track VGPR + wave index for each subregister of the SGPR spilled to 225 // frameindex key. 226 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 227 unsigned NumVGPRSpillLanes = 0; 228 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 229 230 public: 231 SIMachineFunctionInfo(const MachineFunction &MF); 232 233 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 234 auto I = SGPRToVGPRSpills.find(FrameIndex); 235 return (I == SGPRToVGPRSpills.end()) ? 236 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 237 } 238 239 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 240 return SpillVGPRs; 241 } 242 243 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 244 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 245 246 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } 247 unsigned getTIDReg() const { return TIDReg; } 248 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 249 250 unsigned getBytesInStackArgArea() const { 251 return BytesInStackArgArea; 252 } 253 254 void setBytesInStackArgArea(unsigned Bytes) { 255 BytesInStackArgArea = Bytes; 256 } 257 258 // Add user SGPRs. 259 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 260 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 261 unsigned addQueuePtr(const SIRegisterInfo &TRI); 262 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 263 unsigned addDispatchID(const SIRegisterInfo &TRI); 264 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 265 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 266 267 // Add system SGPRs. 268 unsigned addWorkGroupIDX() { 269 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 270 NumSystemSGPRs += 1; 271 return ArgInfo.WorkGroupIDX.getRegister(); 272 } 273 274 unsigned addWorkGroupIDY() { 275 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 276 NumSystemSGPRs += 1; 277 return ArgInfo.WorkGroupIDY.getRegister(); 278 } 279 280 unsigned addWorkGroupIDZ() { 281 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 282 NumSystemSGPRs += 1; 283 return ArgInfo.WorkGroupIDZ.getRegister(); 284 } 285 286 unsigned addWorkGroupInfo() { 287 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 288 NumSystemSGPRs += 1; 289 return ArgInfo.WorkGroupInfo.getRegister(); 290 } 291 292 // Add special VGPR inputs 293 void setWorkItemIDX(ArgDescriptor Arg) { 294 ArgInfo.WorkItemIDX = Arg; 295 } 296 297 void setWorkItemIDY(ArgDescriptor Arg) { 298 ArgInfo.WorkItemIDY = Arg; 299 } 300 301 void setWorkItemIDZ(ArgDescriptor Arg) { 302 ArgInfo.WorkItemIDZ = Arg; 303 } 304 305 unsigned addPrivateSegmentWaveByteOffset() { 306 ArgInfo.PrivateSegmentWaveByteOffset 307 = ArgDescriptor::createRegister(getNextSystemSGPR()); 308 NumSystemSGPRs += 1; 309 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 310 } 311 312 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 313 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 314 } 315 316 bool hasPrivateSegmentBuffer() const { 317 return PrivateSegmentBuffer; 318 } 319 320 bool hasDispatchPtr() const { 321 return DispatchPtr; 322 } 323 324 bool hasQueuePtr() const { 325 return QueuePtr; 326 } 327 328 bool hasKernargSegmentPtr() const { 329 return KernargSegmentPtr; 330 } 331 332 bool hasDispatchID() const { 333 return DispatchID; 334 } 335 336 bool hasFlatScratchInit() const { 337 return FlatScratchInit; 338 } 339 340 bool hasGridWorkgroupCountX() const { 341 return GridWorkgroupCountX; 342 } 343 344 bool hasGridWorkgroupCountY() const { 345 return GridWorkgroupCountY; 346 } 347 348 bool hasGridWorkgroupCountZ() const { 349 return GridWorkgroupCountZ; 350 } 351 352 bool hasWorkGroupIDX() const { 353 return WorkGroupIDX; 354 } 355 356 bool hasWorkGroupIDY() const { 357 return WorkGroupIDY; 358 } 359 360 bool hasWorkGroupIDZ() const { 361 return WorkGroupIDZ; 362 } 363 364 bool hasWorkGroupInfo() const { 365 return WorkGroupInfo; 366 } 367 368 bool hasPrivateSegmentWaveByteOffset() const { 369 return PrivateSegmentWaveByteOffset; 370 } 371 372 bool hasWorkItemIDX() const { 373 return WorkItemIDX; 374 } 375 376 bool hasWorkItemIDY() const { 377 return WorkItemIDY; 378 } 379 380 bool hasWorkItemIDZ() const { 381 return WorkItemIDZ; 382 } 383 384 bool hasImplicitArgPtr() const { 385 return ImplicitArgPtr; 386 } 387 388 bool hasImplicitBufferPtr() const { 389 return ImplicitBufferPtr; 390 } 391 392 AMDGPUFunctionArgInfo &getArgInfo() { 393 return ArgInfo; 394 } 395 396 const AMDGPUFunctionArgInfo &getArgInfo() const { 397 return ArgInfo; 398 } 399 400 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 401 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 402 return ArgInfo.getPreloadedValue(Value); 403 } 404 405 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 406 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 407 } 408 409 unsigned getGITPtrHigh() const { 410 return GITPtrHigh; 411 } 412 413 unsigned get32BitAddressHighBits() const { 414 return HighBitsOf32BitAddress; 415 } 416 417 unsigned getNumUserSGPRs() const { 418 return NumUserSGPRs; 419 } 420 421 unsigned getNumPreloadedSGPRs() const { 422 return NumUserSGPRs + NumSystemSGPRs; 423 } 424 425 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 426 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 427 } 428 429 /// Returns the physical register reserved for use as the resource 430 /// descriptor for scratch accesses. 431 unsigned getScratchRSrcReg() const { 432 return ScratchRSrcReg; 433 } 434 435 void setScratchRSrcReg(unsigned Reg) { 436 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 437 ScratchRSrcReg = Reg; 438 } 439 440 unsigned getScratchWaveOffsetReg() const { 441 return ScratchWaveOffsetReg; 442 } 443 444 unsigned getFrameOffsetReg() const { 445 return FrameOffsetReg; 446 } 447 448 void setStackPtrOffsetReg(unsigned Reg) { 449 StackPtrOffsetReg = Reg; 450 } 451 452 // Note the unset value for this is AMDGPU::SP_REG rather than 453 // NoRegister. This is mostly a workaround for MIR tests where state that 454 // can't be directly computed from the function is not preserved in serialized 455 // MIR. 456 unsigned getStackPtrOffsetReg() const { 457 return StackPtrOffsetReg; 458 } 459 460 void setScratchWaveOffsetReg(unsigned Reg) { 461 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 462 ScratchWaveOffsetReg = Reg; 463 if (isEntryFunction()) 464 FrameOffsetReg = ScratchWaveOffsetReg; 465 } 466 467 unsigned getQueuePtrUserSGPR() const { 468 return ArgInfo.QueuePtr.getRegister(); 469 } 470 471 unsigned getImplicitBufferPtrUserSGPR() const { 472 return ArgInfo.ImplicitBufferPtr.getRegister(); 473 } 474 475 bool hasSpilledSGPRs() const { 476 return HasSpilledSGPRs; 477 } 478 479 void setHasSpilledSGPRs(bool Spill = true) { 480 HasSpilledSGPRs = Spill; 481 } 482 483 bool hasSpilledVGPRs() const { 484 return HasSpilledVGPRs; 485 } 486 487 void setHasSpilledVGPRs(bool Spill = true) { 488 HasSpilledVGPRs = Spill; 489 } 490 491 bool hasNonSpillStackObjects() const { 492 return HasNonSpillStackObjects; 493 } 494 495 void setHasNonSpillStackObjects(bool StackObject = true) { 496 HasNonSpillStackObjects = StackObject; 497 } 498 499 bool isStackRealigned() const { 500 return IsStackRealigned; 501 } 502 503 void setIsStackRealigned(bool Realigned = true) { 504 IsStackRealigned = Realigned; 505 } 506 507 unsigned getNumSpilledSGPRs() const { 508 return NumSpilledSGPRs; 509 } 510 511 unsigned getNumSpilledVGPRs() const { 512 return NumSpilledVGPRs; 513 } 514 515 void addToSpilledSGPRs(unsigned num) { 516 NumSpilledSGPRs += num; 517 } 518 519 void addToSpilledVGPRs(unsigned num) { 520 NumSpilledVGPRs += num; 521 } 522 523 unsigned getPSInputAddr() const { 524 return PSInputAddr; 525 } 526 527 unsigned getPSInputEnable() const { 528 return PSInputEnable; 529 } 530 531 bool isPSInputAllocated(unsigned Index) const { 532 return PSInputAddr & (1 << Index); 533 } 534 535 void markPSInputAllocated(unsigned Index) { 536 PSInputAddr |= 1 << Index; 537 } 538 539 void markPSInputEnabled(unsigned Index) { 540 PSInputEnable |= 1 << Index; 541 } 542 543 bool returnsVoid() const { 544 return ReturnsVoid; 545 } 546 547 void setIfReturnsVoid(bool Value) { 548 ReturnsVoid = Value; 549 } 550 551 /// \returns A pair of default/requested minimum/maximum flat work group sizes 552 /// for this function. 553 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 554 return FlatWorkGroupSizes; 555 } 556 557 /// \returns Default/requested minimum flat work group size for this function. 558 unsigned getMinFlatWorkGroupSize() const { 559 return FlatWorkGroupSizes.first; 560 } 561 562 /// \returns Default/requested maximum flat work group size for this function. 563 unsigned getMaxFlatWorkGroupSize() const { 564 return FlatWorkGroupSizes.second; 565 } 566 567 /// \returns A pair of default/requested minimum/maximum number of waves per 568 /// execution unit. 569 std::pair<unsigned, unsigned> getWavesPerEU() const { 570 return WavesPerEU; 571 } 572 573 /// \returns Default/requested minimum number of waves per execution unit. 574 unsigned getMinWavesPerEU() const { 575 return WavesPerEU.first; 576 } 577 578 /// \returns Default/requested maximum number of waves per execution unit. 579 unsigned getMaxWavesPerEU() const { 580 return WavesPerEU.second; 581 } 582 583 /// \returns Stack object index for \p Dim's work group ID. 584 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 585 assert(Dim < 3); 586 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 587 } 588 589 /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 590 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 591 assert(Dim < 3); 592 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 593 } 594 595 /// \returns Stack object index for \p Dim's work item ID. 596 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 597 assert(Dim < 3); 598 return DebuggerWorkItemIDStackObjectIndices[Dim]; 599 } 600 601 /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 602 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 603 assert(Dim < 3); 604 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 605 } 606 607 /// \returns SGPR used for \p Dim's work group ID. 608 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 609 switch (Dim) { 610 case 0: 611 assert(hasWorkGroupIDX()); 612 return ArgInfo.WorkGroupIDX.getRegister(); 613 case 1: 614 assert(hasWorkGroupIDY()); 615 return ArgInfo.WorkGroupIDY.getRegister(); 616 case 2: 617 assert(hasWorkGroupIDZ()); 618 return ArgInfo.WorkGroupIDZ.getRegister(); 619 } 620 llvm_unreachable("unexpected dimension"); 621 } 622 623 /// \returns VGPR used for \p Dim' work item ID. 624 unsigned getWorkItemIDVGPR(unsigned Dim) const { 625 switch (Dim) { 626 case 0: 627 assert(hasWorkItemIDX()); 628 return AMDGPU::VGPR0; 629 case 1: 630 assert(hasWorkItemIDY()); 631 return AMDGPU::VGPR1; 632 case 2: 633 assert(hasWorkItemIDZ()); 634 return AMDGPU::VGPR2; 635 } 636 llvm_unreachable("unexpected dimension"); 637 } 638 639 unsigned getLDSWaveSpillSize() const { 640 return LDSWaveSpillSize; 641 } 642 643 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 644 const Value *BufferRsrc) { 645 assert(BufferRsrc); 646 auto PSV = BufferPSVs.try_emplace( 647 BufferRsrc, 648 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 649 return PSV.first->second.get(); 650 } 651 652 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 653 const Value *ImgRsrc) { 654 assert(ImgRsrc); 655 auto PSV = ImagePSVs.try_emplace( 656 ImgRsrc, 657 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 658 return PSV.first->second.get(); 659 } 660 }; 661 662 } // end namespace llvm 663 664 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 665