1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUMachineFunction.h" 18 #include "SIInstrInfo.h" 19 #include "SIRegisterInfo.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/CodeGen/PseudoSourceValue.h" 26 #include "llvm/CodeGen/TargetInstrInfo.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include <array> 30 #include <cassert> 31 #include <utility> 32 #include <vector> 33 34 namespace llvm { 35 36 class MachineFrameInfo; 37 class MachineFunction; 38 class TargetRegisterClass; 39 40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 41 public: 42 // TODO: Is the img rsrc useful? 43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 45 46 bool isConstant(const MachineFrameInfo *) const override { 47 // This should probably be true for most images, but we will start by being 48 // conservative. 49 return false; 50 } 51 52 bool isAliased(const MachineFrameInfo *) const override { 53 return true; 54 } 55 56 bool mayAlias(const MachineFrameInfo *) const override { 57 return true; 58 } 59 }; 60 61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 62 public: 63 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 64 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 65 66 bool isConstant(const MachineFrameInfo *) const override { 67 // This should probably be true for most images, but we will start by being 68 // conservative. 69 return false; 70 } 71 72 bool isAliased(const MachineFrameInfo *) const override { 73 return true; 74 } 75 76 bool mayAlias(const MachineFrameInfo *) const override { 77 return true; 78 } 79 }; 80 81 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 82 /// tells the hardware which interpolation parameters to load. 83 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 84 unsigned TIDReg = AMDGPU::NoRegister; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 89 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg = AMDGPU::FP_REG; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 98 99 AMDGPUFunctionArgInfo ArgInfo; 100 101 // Graphics info. 102 unsigned PSInputAddr = 0; 103 unsigned PSInputEnable = 0; 104 105 /// Number of bytes of arguments this function has on the stack. If the callee 106 /// is expected to restore the argument stack this should be a multiple of 16, 107 /// all usable during a tail call. 108 /// 109 /// The alternative would forbid tail call optimisation in some cases: if we 110 /// want to transfer control from a function with 8-bytes of stack-argument 111 /// space to a function with 16-bytes then misalignment of this value would 112 /// make a stack adjustment necessary, which could not be undone by the 113 /// callee. 114 unsigned BytesInStackArgArea = 0; 115 116 bool ReturnsVoid = true; 117 118 // A pair of default/requested minimum/maximum flat work group sizes. 119 // Minimum - first, maximum - second. 120 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 121 122 // A pair of default/requested minimum/maximum number of waves per execution 123 // unit. Minimum - first, maximum - second. 124 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 125 126 DenseMap<const Value *, 127 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 128 DenseMap<const Value *, 129 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 130 131 private: 132 unsigned LDSWaveSpillSize = 0; 133 unsigned NumUserSGPRs = 0; 134 unsigned NumSystemSGPRs = 0; 135 136 bool HasSpilledSGPRs = false; 137 bool HasSpilledVGPRs = false; 138 bool HasNonSpillStackObjects = false; 139 bool IsStackRealigned = false; 140 141 unsigned NumSpilledSGPRs = 0; 142 unsigned NumSpilledVGPRs = 0; 143 144 // Feature bits required for inputs passed in user SGPRs. 145 bool PrivateSegmentBuffer : 1; 146 bool DispatchPtr : 1; 147 bool QueuePtr : 1; 148 bool KernargSegmentPtr : 1; 149 bool DispatchID : 1; 150 bool FlatScratchInit : 1; 151 152 // Feature bits required for inputs passed in system SGPRs. 153 bool WorkGroupIDX : 1; // Always initialized. 154 bool WorkGroupIDY : 1; 155 bool WorkGroupIDZ : 1; 156 bool WorkGroupInfo : 1; 157 bool PrivateSegmentWaveByteOffset : 1; 158 159 bool WorkItemIDX : 1; // Always initialized. 160 bool WorkItemIDY : 1; 161 bool WorkItemIDZ : 1; 162 163 // Private memory buffer 164 // Compute directly in sgpr[0:1] 165 // Other shaders indirect 64-bits at sgpr[0:1] 166 bool ImplicitBufferPtr : 1; 167 168 // Pointer to where the ABI inserts special kernel arguments separate from the 169 // user arguments. This is an offset from the KernargSegmentPtr. 170 bool ImplicitArgPtr : 1; 171 172 // The hard-wired high half of the address of the global information table 173 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 174 // current hardware only allows a 16 bit value. 175 unsigned GITPtrHigh; 176 177 unsigned HighBitsOf32BitAddress; 178 179 // Current recorded maximum possible occupancy. 180 unsigned Occupancy; 181 182 MCPhysReg getNextUserSGPR() const; 183 184 MCPhysReg getNextSystemSGPR() const; 185 186 public: 187 struct SpilledReg { 188 unsigned VGPR = 0; 189 int Lane = -1; 190 191 SpilledReg() = default; 192 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 193 194 bool hasLane() { return Lane != -1;} 195 bool hasReg() { return VGPR != 0;} 196 }; 197 198 struct SGPRSpillVGPRCSR { 199 // VGPR used for SGPR spills 200 unsigned VGPR; 201 202 // If the VGPR is a CSR, the stack slot used to save/restore it in the 203 // prolog/epilog. 204 Optional<int> FI; 205 206 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 207 }; 208 209 private: 210 // SGPR->VGPR spilling support. 211 using SpillRegMask = std::pair<unsigned, unsigned>; 212 213 // Track VGPR + wave index for each subregister of the SGPR spilled to 214 // frameindex key. 215 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 216 unsigned NumVGPRSpillLanes = 0; 217 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 218 219 public: 220 SIMachineFunctionInfo(const MachineFunction &MF); 221 222 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 223 auto I = SGPRToVGPRSpills.find(FrameIndex); 224 return (I == SGPRToVGPRSpills.end()) ? 225 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 226 } 227 228 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 229 return SpillVGPRs; 230 } 231 232 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 233 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 234 235 bool hasCalculatedTID() const { return TIDReg != 0; }; 236 unsigned getTIDReg() const { return TIDReg; }; 237 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 238 239 unsigned getBytesInStackArgArea() const { 240 return BytesInStackArgArea; 241 } 242 243 void setBytesInStackArgArea(unsigned Bytes) { 244 BytesInStackArgArea = Bytes; 245 } 246 247 // Add user SGPRs. 248 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 249 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 250 unsigned addQueuePtr(const SIRegisterInfo &TRI); 251 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 252 unsigned addDispatchID(const SIRegisterInfo &TRI); 253 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 254 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 255 256 // Add system SGPRs. 257 unsigned addWorkGroupIDX() { 258 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 259 NumSystemSGPRs += 1; 260 return ArgInfo.WorkGroupIDX.getRegister(); 261 } 262 263 unsigned addWorkGroupIDY() { 264 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 265 NumSystemSGPRs += 1; 266 return ArgInfo.WorkGroupIDY.getRegister(); 267 } 268 269 unsigned addWorkGroupIDZ() { 270 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 271 NumSystemSGPRs += 1; 272 return ArgInfo.WorkGroupIDZ.getRegister(); 273 } 274 275 unsigned addWorkGroupInfo() { 276 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 277 NumSystemSGPRs += 1; 278 return ArgInfo.WorkGroupInfo.getRegister(); 279 } 280 281 // Add special VGPR inputs 282 void setWorkItemIDX(ArgDescriptor Arg) { 283 ArgInfo.WorkItemIDX = Arg; 284 } 285 286 void setWorkItemIDY(ArgDescriptor Arg) { 287 ArgInfo.WorkItemIDY = Arg; 288 } 289 290 void setWorkItemIDZ(ArgDescriptor Arg) { 291 ArgInfo.WorkItemIDZ = Arg; 292 } 293 294 unsigned addPrivateSegmentWaveByteOffset() { 295 ArgInfo.PrivateSegmentWaveByteOffset 296 = ArgDescriptor::createRegister(getNextSystemSGPR()); 297 NumSystemSGPRs += 1; 298 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 299 } 300 301 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 302 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 303 } 304 305 bool hasPrivateSegmentBuffer() const { 306 return PrivateSegmentBuffer; 307 } 308 309 bool hasDispatchPtr() const { 310 return DispatchPtr; 311 } 312 313 bool hasQueuePtr() const { 314 return QueuePtr; 315 } 316 317 bool hasKernargSegmentPtr() const { 318 return KernargSegmentPtr; 319 } 320 321 bool hasDispatchID() const { 322 return DispatchID; 323 } 324 325 bool hasFlatScratchInit() const { 326 return FlatScratchInit; 327 } 328 329 bool hasWorkGroupIDX() const { 330 return WorkGroupIDX; 331 } 332 333 bool hasWorkGroupIDY() const { 334 return WorkGroupIDY; 335 } 336 337 bool hasWorkGroupIDZ() const { 338 return WorkGroupIDZ; 339 } 340 341 bool hasWorkGroupInfo() const { 342 return WorkGroupInfo; 343 } 344 345 bool hasPrivateSegmentWaveByteOffset() const { 346 return PrivateSegmentWaveByteOffset; 347 } 348 349 bool hasWorkItemIDX() const { 350 return WorkItemIDX; 351 } 352 353 bool hasWorkItemIDY() const { 354 return WorkItemIDY; 355 } 356 357 bool hasWorkItemIDZ() const { 358 return WorkItemIDZ; 359 } 360 361 bool hasImplicitArgPtr() const { 362 return ImplicitArgPtr; 363 } 364 365 bool hasImplicitBufferPtr() const { 366 return ImplicitBufferPtr; 367 } 368 369 AMDGPUFunctionArgInfo &getArgInfo() { 370 return ArgInfo; 371 } 372 373 const AMDGPUFunctionArgInfo &getArgInfo() const { 374 return ArgInfo; 375 } 376 377 std::pair<const ArgDescriptor *, const TargetRegisterClass *> 378 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 379 return ArgInfo.getPreloadedValue(Value); 380 } 381 382 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 383 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 384 } 385 386 unsigned getGITPtrHigh() const { 387 return GITPtrHigh; 388 } 389 390 unsigned get32BitAddressHighBits() const { 391 return HighBitsOf32BitAddress; 392 } 393 394 unsigned getNumUserSGPRs() const { 395 return NumUserSGPRs; 396 } 397 398 unsigned getNumPreloadedSGPRs() const { 399 return NumUserSGPRs + NumSystemSGPRs; 400 } 401 402 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 403 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 404 } 405 406 /// Returns the physical register reserved for use as the resource 407 /// descriptor for scratch accesses. 408 unsigned getScratchRSrcReg() const { 409 return ScratchRSrcReg; 410 } 411 412 void setScratchRSrcReg(unsigned Reg) { 413 assert(Reg != 0 && "Should never be unset"); 414 ScratchRSrcReg = Reg; 415 } 416 417 unsigned getScratchWaveOffsetReg() const { 418 return ScratchWaveOffsetReg; 419 } 420 421 unsigned getFrameOffsetReg() const { 422 return FrameOffsetReg; 423 } 424 425 void setStackPtrOffsetReg(unsigned Reg) { 426 assert(Reg != 0 && "Should never be unset"); 427 StackPtrOffsetReg = Reg; 428 } 429 430 // Note the unset value for this is AMDGPU::SP_REG rather than 431 // NoRegister. This is mostly a workaround for MIR tests where state that 432 // can't be directly computed from the function is not preserved in serialized 433 // MIR. 434 unsigned getStackPtrOffsetReg() const { 435 return StackPtrOffsetReg; 436 } 437 438 void setScratchWaveOffsetReg(unsigned Reg) { 439 assert(Reg != 0 && "Should never be unset"); 440 ScratchWaveOffsetReg = Reg; 441 if (isEntryFunction()) 442 FrameOffsetReg = ScratchWaveOffsetReg; 443 } 444 445 unsigned getQueuePtrUserSGPR() const { 446 return ArgInfo.QueuePtr.getRegister(); 447 } 448 449 unsigned getImplicitBufferPtrUserSGPR() const { 450 return ArgInfo.ImplicitBufferPtr.getRegister(); 451 } 452 453 bool hasSpilledSGPRs() const { 454 return HasSpilledSGPRs; 455 } 456 457 void setHasSpilledSGPRs(bool Spill = true) { 458 HasSpilledSGPRs = Spill; 459 } 460 461 bool hasSpilledVGPRs() const { 462 return HasSpilledVGPRs; 463 } 464 465 void setHasSpilledVGPRs(bool Spill = true) { 466 HasSpilledVGPRs = Spill; 467 } 468 469 bool hasNonSpillStackObjects() const { 470 return HasNonSpillStackObjects; 471 } 472 473 void setHasNonSpillStackObjects(bool StackObject = true) { 474 HasNonSpillStackObjects = StackObject; 475 } 476 477 bool isStackRealigned() const { 478 return IsStackRealigned; 479 } 480 481 void setIsStackRealigned(bool Realigned = true) { 482 IsStackRealigned = Realigned; 483 } 484 485 unsigned getNumSpilledSGPRs() const { 486 return NumSpilledSGPRs; 487 } 488 489 unsigned getNumSpilledVGPRs() const { 490 return NumSpilledVGPRs; 491 } 492 493 void addToSpilledSGPRs(unsigned num) { 494 NumSpilledSGPRs += num; 495 } 496 497 void addToSpilledVGPRs(unsigned num) { 498 NumSpilledVGPRs += num; 499 } 500 501 unsigned getPSInputAddr() const { 502 return PSInputAddr; 503 } 504 505 unsigned getPSInputEnable() const { 506 return PSInputEnable; 507 } 508 509 bool isPSInputAllocated(unsigned Index) const { 510 return PSInputAddr & (1 << Index); 511 } 512 513 void markPSInputAllocated(unsigned Index) { 514 PSInputAddr |= 1 << Index; 515 } 516 517 void markPSInputEnabled(unsigned Index) { 518 PSInputEnable |= 1 << Index; 519 } 520 521 bool returnsVoid() const { 522 return ReturnsVoid; 523 } 524 525 void setIfReturnsVoid(bool Value) { 526 ReturnsVoid = Value; 527 } 528 529 /// \returns A pair of default/requested minimum/maximum flat work group sizes 530 /// for this function. 531 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 532 return FlatWorkGroupSizes; 533 } 534 535 /// \returns Default/requested minimum flat work group size for this function. 536 unsigned getMinFlatWorkGroupSize() const { 537 return FlatWorkGroupSizes.first; 538 } 539 540 /// \returns Default/requested maximum flat work group size for this function. 541 unsigned getMaxFlatWorkGroupSize() const { 542 return FlatWorkGroupSizes.second; 543 } 544 545 /// \returns A pair of default/requested minimum/maximum number of waves per 546 /// execution unit. 547 std::pair<unsigned, unsigned> getWavesPerEU() const { 548 return WavesPerEU; 549 } 550 551 /// \returns Default/requested minimum number of waves per execution unit. 552 unsigned getMinWavesPerEU() const { 553 return WavesPerEU.first; 554 } 555 556 /// \returns Default/requested maximum number of waves per execution unit. 557 unsigned getMaxWavesPerEU() const { 558 return WavesPerEU.second; 559 } 560 561 /// \returns SGPR used for \p Dim's work group ID. 562 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 563 switch (Dim) { 564 case 0: 565 assert(hasWorkGroupIDX()); 566 return ArgInfo.WorkGroupIDX.getRegister(); 567 case 1: 568 assert(hasWorkGroupIDY()); 569 return ArgInfo.WorkGroupIDY.getRegister(); 570 case 2: 571 assert(hasWorkGroupIDZ()); 572 return ArgInfo.WorkGroupIDZ.getRegister(); 573 } 574 llvm_unreachable("unexpected dimension"); 575 } 576 577 /// \returns VGPR used for \p Dim' work item ID. 578 unsigned getWorkItemIDVGPR(unsigned Dim) const; 579 580 unsigned getLDSWaveSpillSize() const { 581 return LDSWaveSpillSize; 582 } 583 584 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 585 const Value *BufferRsrc) { 586 assert(BufferRsrc); 587 auto PSV = BufferPSVs.try_emplace( 588 BufferRsrc, 589 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 590 return PSV.first->second.get(); 591 } 592 593 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 594 const Value *ImgRsrc) { 595 assert(ImgRsrc); 596 auto PSV = ImagePSVs.try_emplace( 597 ImgRsrc, 598 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 599 return PSV.first->second.get(); 600 } 601 602 unsigned getOccupancy() const { 603 return Occupancy; 604 } 605 606 unsigned getMinAllowedOccupancy() const { 607 if (!isMemoryBound() && !needsWaveLimiter()) 608 return Occupancy; 609 return (Occupancy < 4) ? Occupancy : 4; 610 } 611 612 void limitOccupancy(const MachineFunction &MF); 613 614 void limitOccupancy(unsigned Limit) { 615 if (Occupancy > Limit) 616 Occupancy = Limit; 617 } 618 619 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 620 if (Occupancy < Limit) 621 Occupancy = Limit; 622 limitOccupancy(MF); 623 } 624 }; 625 626 } // end namespace llvm 627 628 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 629