xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "llvm/CodeGen/MIRParser/MIParser.h"
12 
13 #define MAX_LANES 64
14 
15 using namespace llvm;
16 
SIMachineFunctionInfo(const MachineFunction & MF)17 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
18   : AMDGPUMachineFunction(MF),
19     PrivateSegmentBuffer(false),
20     DispatchPtr(false),
21     QueuePtr(false),
22     KernargSegmentPtr(false),
23     DispatchID(false),
24     FlatScratchInit(false),
25     WorkGroupIDX(false),
26     WorkGroupIDY(false),
27     WorkGroupIDZ(false),
28     WorkGroupInfo(false),
29     PrivateSegmentWaveByteOffset(false),
30     WorkItemIDX(false),
31     WorkItemIDY(false),
32     WorkItemIDZ(false),
33     ImplicitBufferPtr(false),
34     ImplicitArgPtr(false),
35     GITPtrHigh(0xffffffff),
36     HighBitsOf32BitAddress(0),
37     GDSSize(0) {
38   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
39   const Function &F = MF.getFunction();
40   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
41   WavesPerEU = ST.getWavesPerEU(F);
42 
43   Occupancy = ST.computeOccupancy(F, getLDSSize());
44   CallingConv::ID CC = F.getCallingConv();
45 
46   // FIXME: Should have analysis or something rather than attribute to detect
47   // calls.
48   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
49 
50   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
51   // have any calls.
52   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
53                            CC != CallingConv::AMDGPU_Gfx &&
54                            (!isEntryFunction() || HasCalls);
55 
56   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
57     if (!F.arg_empty())
58       KernargSegmentPtr = true;
59     WorkGroupIDX = true;
60     WorkItemIDX = true;
61   } else if (CC == CallingConv::AMDGPU_PS) {
62     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
63   }
64 
65   if (!isEntryFunction()) {
66     if (UseFixedABI)
67       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
68 
69     // TODO: Pick a high register, and shift down, similar to a kernel.
70     FrameOffsetReg = AMDGPU::SGPR33;
71     StackPtrOffsetReg = AMDGPU::SGPR32;
72 
73     if (!ST.enableFlatScratch()) {
74       // Non-entry functions have no special inputs for now, other registers
75       // required for scratch access.
76       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
77 
78       ArgInfo.PrivateSegmentBuffer =
79         ArgDescriptor::createRegister(ScratchRSrcReg);
80     }
81 
82     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
83       ImplicitArgPtr = true;
84   } else {
85     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
86       KernargSegmentPtr = true;
87       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
88                                  MaxKernArgAlign);
89     }
90   }
91 
92   if (UseFixedABI) {
93     WorkGroupIDX = true;
94     WorkGroupIDY = true;
95     WorkGroupIDZ = true;
96     WorkItemIDX = true;
97     WorkItemIDY = true;
98     WorkItemIDZ = true;
99     ImplicitArgPtr = true;
100   } else {
101     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
102       WorkGroupIDX = true;
103 
104     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
105       WorkGroupIDY = true;
106 
107     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
108       WorkGroupIDZ = true;
109 
110     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
111       WorkItemIDX = true;
112 
113     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
114       WorkItemIDY = true;
115 
116     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
117       WorkItemIDZ = true;
118   }
119 
120   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
121   if (isEntryFunction()) {
122     // X, XY, and XYZ are the only supported combinations, so make sure Y is
123     // enabled if Z is.
124     if (WorkItemIDZ)
125       WorkItemIDY = true;
126 
127     if (!ST.flatScratchIsArchitected()) {
128       PrivateSegmentWaveByteOffset = true;
129 
130       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
131       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
132           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
133         ArgInfo.PrivateSegmentWaveByteOffset =
134             ArgDescriptor::createRegister(AMDGPU::SGPR5);
135     }
136   }
137 
138   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
139   if (isAmdHsaOrMesa) {
140     if (!ST.enableFlatScratch())
141       PrivateSegmentBuffer = true;
142 
143     if (UseFixedABI) {
144       DispatchPtr = true;
145       QueuePtr = true;
146 
147       // FIXME: We don't need this?
148       DispatchID = true;
149     } else {
150       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
151         DispatchPtr = true;
152 
153       if (F.hasFnAttribute("amdgpu-queue-ptr"))
154         QueuePtr = true;
155 
156       if (F.hasFnAttribute("amdgpu-dispatch-id"))
157         DispatchID = true;
158     }
159   } else if (ST.isMesaGfxShader(F)) {
160     ImplicitBufferPtr = true;
161   }
162 
163   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
164     KernargSegmentPtr = true;
165 
166   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
167       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
168       !ST.flatScratchIsArchitected()) {
169     // TODO: This could be refined a lot. The attribute is a poor way of
170     // detecting calls or stack objects that may require it before argument
171     // lowering.
172     if (HasCalls || HasStackObjects || ST.enableFlatScratch())
173       FlatScratchInit = true;
174   }
175 
176   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
177   StringRef S = A.getValueAsString();
178   if (!S.empty())
179     S.consumeInteger(0, GITPtrHigh);
180 
181   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
182   S = A.getValueAsString();
183   if (!S.empty())
184     S.consumeInteger(0, HighBitsOf32BitAddress);
185 
186   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
187   if (!S.empty())
188     S.consumeInteger(0, GDSSize);
189 }
190 
limitOccupancy(const MachineFunction & MF)191 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
192   limitOccupancy(getMaxWavesPerEU());
193   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
194   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
195                  MF.getFunction()));
196 }
197 
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)198 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
199   const SIRegisterInfo &TRI) {
200   ArgInfo.PrivateSegmentBuffer =
201     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
202     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
203   NumUserSGPRs += 4;
204   return ArgInfo.PrivateSegmentBuffer.getRegister();
205 }
206 
addDispatchPtr(const SIRegisterInfo & TRI)207 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
208   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
209     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210   NumUserSGPRs += 2;
211   return ArgInfo.DispatchPtr.getRegister();
212 }
213 
addQueuePtr(const SIRegisterInfo & TRI)214 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
215   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
216     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217   NumUserSGPRs += 2;
218   return ArgInfo.QueuePtr.getRegister();
219 }
220 
addKernargSegmentPtr(const SIRegisterInfo & TRI)221 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
222   ArgInfo.KernargSegmentPtr
223     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
224     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225   NumUserSGPRs += 2;
226   return ArgInfo.KernargSegmentPtr.getRegister();
227 }
228 
addDispatchID(const SIRegisterInfo & TRI)229 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
230   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
231     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232   NumUserSGPRs += 2;
233   return ArgInfo.DispatchID.getRegister();
234 }
235 
addFlatScratchInit(const SIRegisterInfo & TRI)236 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
237   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
238     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
239   NumUserSGPRs += 2;
240   return ArgInfo.FlatScratchInit.getRegister();
241 }
242 
addImplicitBufferPtr(const SIRegisterInfo & TRI)243 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
244   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
245     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
246   NumUserSGPRs += 2;
247   return ArgInfo.ImplicitBufferPtr.getRegister();
248 }
249 
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg)250 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
251                                              MCPhysReg Reg) {
252   for (unsigned I = 0; CSRegs[I]; ++I) {
253     if (CSRegs[I] == Reg)
254       return true;
255   }
256 
257   return false;
258 }
259 
260 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
261 /// SGPR spilling.
262 //
263 // FIXME: This only works after processFunctionBeforeFrameFinalized
haveFreeLanesForSGPRSpill(const MachineFunction & MF,unsigned NumNeed) const264 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
265                                                       unsigned NumNeed) const {
266   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
267   unsigned WaveSize = ST.getWavefrontSize();
268   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
269 }
270 
271 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
allocateSGPRSpillToVGPR(MachineFunction & MF,int FI)272 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
273                                                     int FI) {
274   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
275 
276   // This has already been allocated.
277   if (!SpillLanes.empty())
278     return true;
279 
280   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
281   const SIRegisterInfo *TRI = ST.getRegisterInfo();
282   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
283   MachineRegisterInfo &MRI = MF.getRegInfo();
284   unsigned WaveSize = ST.getWavefrontSize();
285   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
286 
287   unsigned Size = FrameInfo.getObjectSize(FI);
288   unsigned NumLanes = Size / 4;
289 
290   if (NumLanes > WaveSize)
291     return false;
292 
293   assert(Size >= 4 && "invalid sgpr spill size");
294   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
295 
296   // Make sure to handle the case where a wide SGPR spill may span between two
297   // VGPRs.
298   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
299     Register LaneVGPR;
300     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
301 
302     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
303     // when one of the two conditions is true:
304     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
305     // reserved.
306     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
307     // required.
308     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
309       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
310       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
311     } else if (VGPRIndex == 0) {
312       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
313       if (LaneVGPR == AMDGPU::NoRegister) {
314         // We have no VGPRs left for spilling SGPRs. Reset because we will not
315         // partially spill the SGPR to VGPRs.
316         SGPRToVGPRSpills.erase(FI);
317         NumVGPRSpillLanes -= I;
318         return false;
319       }
320 
321       Optional<int> SpillFI;
322       // We need to preserve inactive lanes, so always save, even caller-save
323       // registers.
324       if (!isEntryFunction()) {
325         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
326       }
327 
328       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
329 
330       // Add this register as live-in to all blocks to avoid machine verifer
331       // complaining about use of an undefined physical register.
332       for (MachineBasicBlock &BB : MF)
333         BB.addLiveIn(LaneVGPR);
334     } else {
335       LaneVGPR = SpillVGPRs.back().VGPR;
336     }
337 
338     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
339   }
340 
341   return true;
342 }
343 
344 /// Reserve a VGPR for spilling of SGPRs
reserveVGPRforSGPRSpills(MachineFunction & MF)345 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
346   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
347   const SIRegisterInfo *TRI = ST.getRegisterInfo();
348   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
349 
350   Register LaneVGPR = TRI->findUnusedRegister(
351       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
352   if (LaneVGPR == Register())
353     return false;
354   SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
355   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
356   return true;
357 }
358 
359 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
360 /// Either AGPR is spilled to VGPR to vice versa.
361 /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)362 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
363                                                     int FI,
364                                                     bool isAGPRtoVGPR) {
365   MachineRegisterInfo &MRI = MF.getRegInfo();
366   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
367   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
368 
369   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
370 
371   auto &Spill = VGPRToAGPRSpills[FI];
372 
373   // This has already been allocated.
374   if (!Spill.Lanes.empty())
375     return Spill.FullyAllocated;
376 
377   unsigned Size = FrameInfo.getObjectSize(FI);
378   unsigned NumLanes = Size / 4;
379   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
380 
381   const TargetRegisterClass &RC =
382       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
383   auto Regs = RC.getRegisters();
384 
385   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
386   const SIRegisterInfo *TRI = ST.getRegisterInfo();
387   Spill.FullyAllocated = true;
388 
389   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
390   // once.
391   BitVector OtherUsedRegs;
392   OtherUsedRegs.resize(TRI->getNumRegs());
393 
394   const uint32_t *CSRMask =
395       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
396   if (CSRMask)
397     OtherUsedRegs.setBitsInMask(CSRMask);
398 
399   // TODO: Should include register tuples, but doesn't matter with current
400   // usage.
401   for (MCPhysReg Reg : SpillAGPR)
402     OtherUsedRegs.set(Reg);
403   for (MCPhysReg Reg : SpillVGPR)
404     OtherUsedRegs.set(Reg);
405 
406   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
407   for (unsigned I = 0; I < NumLanes; ++I) {
408     NextSpillReg = std::find_if(
409         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
410           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
411                  !OtherUsedRegs[Reg];
412         });
413 
414     if (NextSpillReg == Regs.end()) { // Registers exhausted
415       Spill.FullyAllocated = false;
416       break;
417     }
418 
419     OtherUsedRegs.set(*NextSpillReg);
420     SpillRegs.push_back(*NextSpillReg);
421     Spill.Lanes[I] = *NextSpillReg++;
422   }
423 
424   return Spill.FullyAllocated;
425 }
426 
removeDeadFrameIndices(MachineFrameInfo & MFI)427 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
428   // The FP & BP spills haven't been inserted yet, so keep them around.
429   for (auto &R : SGPRToVGPRSpills) {
430     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
431       MFI.RemoveStackObject(R.first);
432   }
433 
434   // All other SPGRs must be allocated on the default stack, so reset the stack
435   // ID.
436   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
437        ++i)
438     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
439       MFI.setStackID(i, TargetStackID::Default);
440 
441   for (auto &R : VGPRToAGPRSpills) {
442     if (R.second.FullyAllocated)
443       MFI.RemoveStackObject(R.first);
444   }
445 }
446 
getScavengeFI(MachineFrameInfo & MFI,const SIRegisterInfo & TRI)447 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
448                                          const SIRegisterInfo &TRI) {
449   if (ScavengeFI)
450     return *ScavengeFI;
451   if (isEntryFunction()) {
452     ScavengeFI = MFI.CreateFixedObject(
453         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
454   } else {
455     ScavengeFI = MFI.CreateStackObject(
456         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
457         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
458   }
459   return *ScavengeFI;
460 }
461 
getNextUserSGPR() const462 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
463   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
464   return AMDGPU::SGPR0 + NumUserSGPRs;
465 }
466 
getNextSystemSGPR() const467 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
468   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
469 }
470 
471 Register
getGITPtrLoReg(const MachineFunction & MF) const472 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
473   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
474   if (!ST.isAmdPalOS())
475     return Register();
476   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
477   if (ST.hasMergedShaders()) {
478     switch (MF.getFunction().getCallingConv()) {
479     case CallingConv::AMDGPU_HS:
480     case CallingConv::AMDGPU_GS:
481       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
482       // ES+GS merged shader on gfx9+.
483       GitPtrLo = AMDGPU::SGPR8;
484       return GitPtrLo;
485     default:
486       return GitPtrLo;
487     }
488   }
489   return GitPtrLo;
490 }
491 
regToString(Register Reg,const TargetRegisterInfo & TRI)492 static yaml::StringValue regToString(Register Reg,
493                                      const TargetRegisterInfo &TRI) {
494   yaml::StringValue Dest;
495   {
496     raw_string_ostream OS(Dest.Value);
497     OS << printReg(Reg, &TRI);
498   }
499   return Dest;
500 }
501 
502 static Optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)503 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
504                     const TargetRegisterInfo &TRI) {
505   yaml::SIArgumentInfo AI;
506 
507   auto convertArg = [&](Optional<yaml::SIArgument> &A,
508                         const ArgDescriptor &Arg) {
509     if (!Arg)
510       return false;
511 
512     // Create a register or stack argument.
513     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
514     if (Arg.isRegister()) {
515       raw_string_ostream OS(SA.RegisterName.Value);
516       OS << printReg(Arg.getRegister(), &TRI);
517     } else
518       SA.StackOffset = Arg.getStackOffset();
519     // Check and update the optional mask.
520     if (Arg.isMasked())
521       SA.Mask = Arg.getMask();
522 
523     A = SA;
524     return true;
525   };
526 
527   bool Any = false;
528   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
529   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
530   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
531   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
532   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
533   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
534   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
535   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
536   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
537   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
538   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
539   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
540                     ArgInfo.PrivateSegmentWaveByteOffset);
541   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
542   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
543   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
544   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
545   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
546 
547   if (Any)
548     return AI;
549 
550   return None;
551 }
552 
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI,const llvm::MachineFunction & MF)553 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
554     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
555     const llvm::MachineFunction &MF)
556     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
557       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
558       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
559       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
560       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
561       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
562       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
563       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
564       Occupancy(MFI.getOccupancy()),
565       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
566       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
567       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
568       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
569   auto SFI = MFI.getOptionalScavengeFI();
570   if (SFI)
571     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
572 }
573 
mappingImpl(yaml::IO & YamlIO)574 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
575   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
576 }
577 
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI,const MachineFunction & MF,PerFunctionMIParsingState & PFS,SMDiagnostic & Error,SMRange & SourceRange)578 bool SIMachineFunctionInfo::initializeBaseYamlFields(
579     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
580     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
581   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
582   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
583   LDSSize = YamlMFI.LDSSize;
584   DynLDSAlign = YamlMFI.DynLDSAlign;
585   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
586   Occupancy = YamlMFI.Occupancy;
587   IsEntryFunction = YamlMFI.IsEntryFunction;
588   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
589   MemoryBound = YamlMFI.MemoryBound;
590   WaveLimiter = YamlMFI.WaveLimiter;
591   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
592   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
593 
594   if (YamlMFI.ScavengeFI) {
595     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
596     if (!FIOrErr) {
597       // Create a diagnostic for a the frame index.
598       const MemoryBuffer &Buffer =
599           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
600 
601       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
602                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
603                            "", None, None);
604       SourceRange = YamlMFI.ScavengeFI->SourceRange;
605       return true;
606     }
607     ScavengeFI = *FIOrErr;
608   } else {
609     ScavengeFI = None;
610   }
611   return false;
612 }
613 
614 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
removeVGPRForSGPRSpill(Register ReservedVGPR,MachineFunction & MF)615 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
616                                                    MachineFunction &MF) {
617   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
618     if (i->VGPR == ReservedVGPR) {
619       SpillVGPRs.erase(i);
620 
621       for (MachineBasicBlock &MBB : MF) {
622         MBB.removeLiveIn(ReservedVGPR);
623         MBB.sortUniqueLiveIns();
624       }
625       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
626       return true;
627     }
628   }
629   return false;
630 }
631