1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "llvm/CodeGen/MIRParser/MIParser.h"
12
13 #define MAX_LANES 64
14
15 using namespace llvm;
16
SIMachineFunctionInfo(const MachineFunction & MF)17 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
18 : AMDGPUMachineFunction(MF),
19 PrivateSegmentBuffer(false),
20 DispatchPtr(false),
21 QueuePtr(false),
22 KernargSegmentPtr(false),
23 DispatchID(false),
24 FlatScratchInit(false),
25 WorkGroupIDX(false),
26 WorkGroupIDY(false),
27 WorkGroupIDZ(false),
28 WorkGroupInfo(false),
29 PrivateSegmentWaveByteOffset(false),
30 WorkItemIDX(false),
31 WorkItemIDY(false),
32 WorkItemIDZ(false),
33 ImplicitBufferPtr(false),
34 ImplicitArgPtr(false),
35 GITPtrHigh(0xffffffff),
36 HighBitsOf32BitAddress(0),
37 GDSSize(0) {
38 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
39 const Function &F = MF.getFunction();
40 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
41 WavesPerEU = ST.getWavesPerEU(F);
42
43 Occupancy = ST.computeOccupancy(F, getLDSSize());
44 CallingConv::ID CC = F.getCallingConv();
45
46 // FIXME: Should have analysis or something rather than attribute to detect
47 // calls.
48 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
49
50 // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
51 // have any calls.
52 const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
53 CC != CallingConv::AMDGPU_Gfx &&
54 (!isEntryFunction() || HasCalls);
55
56 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
57 if (!F.arg_empty())
58 KernargSegmentPtr = true;
59 WorkGroupIDX = true;
60 WorkItemIDX = true;
61 } else if (CC == CallingConv::AMDGPU_PS) {
62 PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
63 }
64
65 if (!isEntryFunction()) {
66 if (UseFixedABI)
67 ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
68
69 // TODO: Pick a high register, and shift down, similar to a kernel.
70 FrameOffsetReg = AMDGPU::SGPR33;
71 StackPtrOffsetReg = AMDGPU::SGPR32;
72
73 if (!ST.enableFlatScratch()) {
74 // Non-entry functions have no special inputs for now, other registers
75 // required for scratch access.
76 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
77
78 ArgInfo.PrivateSegmentBuffer =
79 ArgDescriptor::createRegister(ScratchRSrcReg);
80 }
81
82 if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
83 ImplicitArgPtr = true;
84 } else {
85 if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
86 KernargSegmentPtr = true;
87 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
88 MaxKernArgAlign);
89 }
90 }
91
92 if (UseFixedABI) {
93 WorkGroupIDX = true;
94 WorkGroupIDY = true;
95 WorkGroupIDZ = true;
96 WorkItemIDX = true;
97 WorkItemIDY = true;
98 WorkItemIDZ = true;
99 ImplicitArgPtr = true;
100 } else {
101 if (F.hasFnAttribute("amdgpu-work-group-id-x"))
102 WorkGroupIDX = true;
103
104 if (F.hasFnAttribute("amdgpu-work-group-id-y"))
105 WorkGroupIDY = true;
106
107 if (F.hasFnAttribute("amdgpu-work-group-id-z"))
108 WorkGroupIDZ = true;
109
110 if (F.hasFnAttribute("amdgpu-work-item-id-x"))
111 WorkItemIDX = true;
112
113 if (F.hasFnAttribute("amdgpu-work-item-id-y"))
114 WorkItemIDY = true;
115
116 if (F.hasFnAttribute("amdgpu-work-item-id-z"))
117 WorkItemIDZ = true;
118 }
119
120 bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
121 if (isEntryFunction()) {
122 // X, XY, and XYZ are the only supported combinations, so make sure Y is
123 // enabled if Z is.
124 if (WorkItemIDZ)
125 WorkItemIDY = true;
126
127 if (!ST.flatScratchIsArchitected()) {
128 PrivateSegmentWaveByteOffset = true;
129
130 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
131 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
132 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
133 ArgInfo.PrivateSegmentWaveByteOffset =
134 ArgDescriptor::createRegister(AMDGPU::SGPR5);
135 }
136 }
137
138 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
139 if (isAmdHsaOrMesa) {
140 if (!ST.enableFlatScratch())
141 PrivateSegmentBuffer = true;
142
143 if (UseFixedABI) {
144 DispatchPtr = true;
145 QueuePtr = true;
146
147 // FIXME: We don't need this?
148 DispatchID = true;
149 } else {
150 if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
151 DispatchPtr = true;
152
153 if (F.hasFnAttribute("amdgpu-queue-ptr"))
154 QueuePtr = true;
155
156 if (F.hasFnAttribute("amdgpu-dispatch-id"))
157 DispatchID = true;
158 }
159 } else if (ST.isMesaGfxShader(F)) {
160 ImplicitBufferPtr = true;
161 }
162
163 if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
164 KernargSegmentPtr = true;
165
166 if (ST.hasFlatAddressSpace() && isEntryFunction() &&
167 (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
168 !ST.flatScratchIsArchitected()) {
169 // TODO: This could be refined a lot. The attribute is a poor way of
170 // detecting calls or stack objects that may require it before argument
171 // lowering.
172 if (HasCalls || HasStackObjects || ST.enableFlatScratch())
173 FlatScratchInit = true;
174 }
175
176 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
177 StringRef S = A.getValueAsString();
178 if (!S.empty())
179 S.consumeInteger(0, GITPtrHigh);
180
181 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
182 S = A.getValueAsString();
183 if (!S.empty())
184 S.consumeInteger(0, HighBitsOf32BitAddress);
185
186 S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
187 if (!S.empty())
188 S.consumeInteger(0, GDSSize);
189 }
190
limitOccupancy(const MachineFunction & MF)191 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
192 limitOccupancy(getMaxWavesPerEU());
193 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
194 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
195 MF.getFunction()));
196 }
197
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)198 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
199 const SIRegisterInfo &TRI) {
200 ArgInfo.PrivateSegmentBuffer =
201 ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
203 NumUserSGPRs += 4;
204 return ArgInfo.PrivateSegmentBuffer.getRegister();
205 }
206
addDispatchPtr(const SIRegisterInfo & TRI)207 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
208 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210 NumUserSGPRs += 2;
211 return ArgInfo.DispatchPtr.getRegister();
212 }
213
addQueuePtr(const SIRegisterInfo & TRI)214 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
215 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
216 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217 NumUserSGPRs += 2;
218 return ArgInfo.QueuePtr.getRegister();
219 }
220
addKernargSegmentPtr(const SIRegisterInfo & TRI)221 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
222 ArgInfo.KernargSegmentPtr
223 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225 NumUserSGPRs += 2;
226 return ArgInfo.KernargSegmentPtr.getRegister();
227 }
228
addDispatchID(const SIRegisterInfo & TRI)229 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
230 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232 NumUserSGPRs += 2;
233 return ArgInfo.DispatchID.getRegister();
234 }
235
addFlatScratchInit(const SIRegisterInfo & TRI)236 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
237 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
238 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
239 NumUserSGPRs += 2;
240 return ArgInfo.FlatScratchInit.getRegister();
241 }
242
addImplicitBufferPtr(const SIRegisterInfo & TRI)243 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
244 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
245 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
246 NumUserSGPRs += 2;
247 return ArgInfo.ImplicitBufferPtr.getRegister();
248 }
249
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg)250 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
251 MCPhysReg Reg) {
252 for (unsigned I = 0; CSRegs[I]; ++I) {
253 if (CSRegs[I] == Reg)
254 return true;
255 }
256
257 return false;
258 }
259
260 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
261 /// SGPR spilling.
262 //
263 // FIXME: This only works after processFunctionBeforeFrameFinalized
haveFreeLanesForSGPRSpill(const MachineFunction & MF,unsigned NumNeed) const264 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
265 unsigned NumNeed) const {
266 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
267 unsigned WaveSize = ST.getWavefrontSize();
268 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
269 }
270
271 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
allocateSGPRSpillToVGPR(MachineFunction & MF,int FI)272 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
273 int FI) {
274 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
275
276 // This has already been allocated.
277 if (!SpillLanes.empty())
278 return true;
279
280 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
281 const SIRegisterInfo *TRI = ST.getRegisterInfo();
282 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
283 MachineRegisterInfo &MRI = MF.getRegInfo();
284 unsigned WaveSize = ST.getWavefrontSize();
285 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
286
287 unsigned Size = FrameInfo.getObjectSize(FI);
288 unsigned NumLanes = Size / 4;
289
290 if (NumLanes > WaveSize)
291 return false;
292
293 assert(Size >= 4 && "invalid sgpr spill size");
294 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
295
296 // Make sure to handle the case where a wide SGPR spill may span between two
297 // VGPRs.
298 for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
299 Register LaneVGPR;
300 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
301
302 // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
303 // when one of the two conditions is true:
304 // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
305 // reserved.
306 // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
307 // required.
308 if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
309 assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
310 LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
311 } else if (VGPRIndex == 0) {
312 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
313 if (LaneVGPR == AMDGPU::NoRegister) {
314 // We have no VGPRs left for spilling SGPRs. Reset because we will not
315 // partially spill the SGPR to VGPRs.
316 SGPRToVGPRSpills.erase(FI);
317 NumVGPRSpillLanes -= I;
318 return false;
319 }
320
321 Optional<int> SpillFI;
322 // We need to preserve inactive lanes, so always save, even caller-save
323 // registers.
324 if (!isEntryFunction()) {
325 SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
326 }
327
328 SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
329
330 // Add this register as live-in to all blocks to avoid machine verifer
331 // complaining about use of an undefined physical register.
332 for (MachineBasicBlock &BB : MF)
333 BB.addLiveIn(LaneVGPR);
334 } else {
335 LaneVGPR = SpillVGPRs.back().VGPR;
336 }
337
338 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
339 }
340
341 return true;
342 }
343
344 /// Reserve a VGPR for spilling of SGPRs
reserveVGPRforSGPRSpills(MachineFunction & MF)345 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
346 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
347 const SIRegisterInfo *TRI = ST.getRegisterInfo();
348 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
349
350 Register LaneVGPR = TRI->findUnusedRegister(
351 MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
352 if (LaneVGPR == Register())
353 return false;
354 SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
355 FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
356 return true;
357 }
358
359 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
360 /// Either AGPR is spilled to VGPR to vice versa.
361 /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)362 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
363 int FI,
364 bool isAGPRtoVGPR) {
365 MachineRegisterInfo &MRI = MF.getRegInfo();
366 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
367 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
368
369 assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
370
371 auto &Spill = VGPRToAGPRSpills[FI];
372
373 // This has already been allocated.
374 if (!Spill.Lanes.empty())
375 return Spill.FullyAllocated;
376
377 unsigned Size = FrameInfo.getObjectSize(FI);
378 unsigned NumLanes = Size / 4;
379 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
380
381 const TargetRegisterClass &RC =
382 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
383 auto Regs = RC.getRegisters();
384
385 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
386 const SIRegisterInfo *TRI = ST.getRegisterInfo();
387 Spill.FullyAllocated = true;
388
389 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
390 // once.
391 BitVector OtherUsedRegs;
392 OtherUsedRegs.resize(TRI->getNumRegs());
393
394 const uint32_t *CSRMask =
395 TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
396 if (CSRMask)
397 OtherUsedRegs.setBitsInMask(CSRMask);
398
399 // TODO: Should include register tuples, but doesn't matter with current
400 // usage.
401 for (MCPhysReg Reg : SpillAGPR)
402 OtherUsedRegs.set(Reg);
403 for (MCPhysReg Reg : SpillVGPR)
404 OtherUsedRegs.set(Reg);
405
406 SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
407 for (unsigned I = 0; I < NumLanes; ++I) {
408 NextSpillReg = std::find_if(
409 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
410 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
411 !OtherUsedRegs[Reg];
412 });
413
414 if (NextSpillReg == Regs.end()) { // Registers exhausted
415 Spill.FullyAllocated = false;
416 break;
417 }
418
419 OtherUsedRegs.set(*NextSpillReg);
420 SpillRegs.push_back(*NextSpillReg);
421 Spill.Lanes[I] = *NextSpillReg++;
422 }
423
424 return Spill.FullyAllocated;
425 }
426
removeDeadFrameIndices(MachineFrameInfo & MFI)427 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
428 // The FP & BP spills haven't been inserted yet, so keep them around.
429 for (auto &R : SGPRToVGPRSpills) {
430 if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
431 MFI.RemoveStackObject(R.first);
432 }
433
434 // All other SPGRs must be allocated on the default stack, so reset the stack
435 // ID.
436 for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
437 ++i)
438 if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
439 MFI.setStackID(i, TargetStackID::Default);
440
441 for (auto &R : VGPRToAGPRSpills) {
442 if (R.second.FullyAllocated)
443 MFI.RemoveStackObject(R.first);
444 }
445 }
446
getScavengeFI(MachineFrameInfo & MFI,const SIRegisterInfo & TRI)447 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
448 const SIRegisterInfo &TRI) {
449 if (ScavengeFI)
450 return *ScavengeFI;
451 if (isEntryFunction()) {
452 ScavengeFI = MFI.CreateFixedObject(
453 TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
454 } else {
455 ScavengeFI = MFI.CreateStackObject(
456 TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
457 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
458 }
459 return *ScavengeFI;
460 }
461
getNextUserSGPR() const462 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
463 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
464 return AMDGPU::SGPR0 + NumUserSGPRs;
465 }
466
getNextSystemSGPR() const467 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
468 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
469 }
470
471 Register
getGITPtrLoReg(const MachineFunction & MF) const472 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
473 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
474 if (!ST.isAmdPalOS())
475 return Register();
476 Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
477 if (ST.hasMergedShaders()) {
478 switch (MF.getFunction().getCallingConv()) {
479 case CallingConv::AMDGPU_HS:
480 case CallingConv::AMDGPU_GS:
481 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
482 // ES+GS merged shader on gfx9+.
483 GitPtrLo = AMDGPU::SGPR8;
484 return GitPtrLo;
485 default:
486 return GitPtrLo;
487 }
488 }
489 return GitPtrLo;
490 }
491
regToString(Register Reg,const TargetRegisterInfo & TRI)492 static yaml::StringValue regToString(Register Reg,
493 const TargetRegisterInfo &TRI) {
494 yaml::StringValue Dest;
495 {
496 raw_string_ostream OS(Dest.Value);
497 OS << printReg(Reg, &TRI);
498 }
499 return Dest;
500 }
501
502 static Optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)503 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
504 const TargetRegisterInfo &TRI) {
505 yaml::SIArgumentInfo AI;
506
507 auto convertArg = [&](Optional<yaml::SIArgument> &A,
508 const ArgDescriptor &Arg) {
509 if (!Arg)
510 return false;
511
512 // Create a register or stack argument.
513 yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
514 if (Arg.isRegister()) {
515 raw_string_ostream OS(SA.RegisterName.Value);
516 OS << printReg(Arg.getRegister(), &TRI);
517 } else
518 SA.StackOffset = Arg.getStackOffset();
519 // Check and update the optional mask.
520 if (Arg.isMasked())
521 SA.Mask = Arg.getMask();
522
523 A = SA;
524 return true;
525 };
526
527 bool Any = false;
528 Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
529 Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
530 Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
531 Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
532 Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
533 Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
534 Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
535 Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
536 Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
537 Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
538 Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
539 Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
540 ArgInfo.PrivateSegmentWaveByteOffset);
541 Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
542 Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
543 Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
544 Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
545 Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
546
547 if (Any)
548 return AI;
549
550 return None;
551 }
552
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI,const llvm::MachineFunction & MF)553 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
554 const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
555 const llvm::MachineFunction &MF)
556 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
557 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
558 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
559 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
560 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
561 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
562 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
563 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
564 Occupancy(MFI.getOccupancy()),
565 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
566 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
567 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
568 ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
569 auto SFI = MFI.getOptionalScavengeFI();
570 if (SFI)
571 ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
572 }
573
mappingImpl(yaml::IO & YamlIO)574 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
575 MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
576 }
577
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI,const MachineFunction & MF,PerFunctionMIParsingState & PFS,SMDiagnostic & Error,SMRange & SourceRange)578 bool SIMachineFunctionInfo::initializeBaseYamlFields(
579 const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
580 PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
581 ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
582 MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
583 LDSSize = YamlMFI.LDSSize;
584 DynLDSAlign = YamlMFI.DynLDSAlign;
585 HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
586 Occupancy = YamlMFI.Occupancy;
587 IsEntryFunction = YamlMFI.IsEntryFunction;
588 NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
589 MemoryBound = YamlMFI.MemoryBound;
590 WaveLimiter = YamlMFI.WaveLimiter;
591 HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
592 HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
593
594 if (YamlMFI.ScavengeFI) {
595 auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
596 if (!FIOrErr) {
597 // Create a diagnostic for a the frame index.
598 const MemoryBuffer &Buffer =
599 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
600
601 Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
602 SourceMgr::DK_Error, toString(FIOrErr.takeError()),
603 "", None, None);
604 SourceRange = YamlMFI.ScavengeFI->SourceRange;
605 return true;
606 }
607 ScavengeFI = *FIOrErr;
608 } else {
609 ScavengeFI = None;
610 }
611 return false;
612 }
613
614 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
removeVGPRForSGPRSpill(Register ReservedVGPR,MachineFunction & MF)615 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
616 MachineFunction &MF) {
617 for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
618 if (i->VGPR == ReservedVGPR) {
619 SpillVGPRs.erase(i);
620
621 for (MachineBasicBlock &MBB : MF) {
622 MBB.removeLiveIn(ReservedVGPR);
623 MBB.sortUniqueLiveIns();
624 }
625 this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
626 return true;
627 }
628 }
629 return false;
630 }
631