xref: /llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp (revision 6fe0fc60341b05bf30ccc16012dab9eeb55a338d)
1 //=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the custom routines for the ARM Calling Convention that
10 // aren't done by tablegen, and includes the table generated implementations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMCallingConv.h"
15 #include "ARM.h"
16 #include "ARMSubtarget.h"
17 using namespace llvm;
18 
19 // APCS f64 is in register pairs, possibly split to stack
20 static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
21                           CCValAssign::LocInfo LocInfo,
22                           CCState &State, bool CanFail) {
23   static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
24 
25   // Try to get the first register.
26   if (MCRegister Reg = State.AllocateReg(RegList))
27     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
28   else {
29     // For the 2nd half of a v2f64, do not fail.
30     if (CanFail)
31       return false;
32 
33     // Put the whole thing on the stack.
34     State.addLoc(CCValAssign::getCustomMem(
35         ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
36     return true;
37   }
38 
39   // Try to get the second register.
40   if (MCRegister Reg = State.AllocateReg(RegList))
41     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
42   else
43     State.addLoc(CCValAssign::getCustomMem(
44         ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
45   return true;
46 }
47 
48 static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
49                                    CCValAssign::LocInfo LocInfo,
50                                    ISD::ArgFlagsTy ArgFlags,
51                                    CCState &State) {
52   if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
53     return false;
54   if (LocVT == MVT::v2f64 &&
55       !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
56     return false;
57   return true;  // we handled it
58 }
59 
60 // AAPCS f64 is in aligned register pairs
61 static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
62                            CCValAssign::LocInfo LocInfo,
63                            CCState &State, bool CanFail) {
64   static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
65   static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
66   static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
67   static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
68 
69   MCRegister Reg = State.AllocateReg(HiRegList, ShadowRegList);
70   if (!Reg) {
71 
72     // If we had R3 unallocated only, now we still must to waste it.
73     Reg = State.AllocateReg(GPRArgRegs);
74     assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
75 
76     // For the 2nd half of a v2f64, do not just fail.
77     if (CanFail)
78       return false;
79 
80     // Put the whole thing on the stack.
81     State.addLoc(CCValAssign::getCustomMem(
82         ValNo, ValVT, State.AllocateStack(8, Align(8)), LocVT, LocInfo));
83     return true;
84   }
85 
86   unsigned i;
87   for (i = 0; i < 2; ++i)
88     if (HiRegList[i] == Reg)
89       break;
90 
91   MCRegister T = State.AllocateReg(LoRegList[i]);
92   (void)T;
93   assert(T == LoRegList[i] && "Could not allocate register");
94 
95   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
96   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
97                                          LocVT, LocInfo));
98   return true;
99 }
100 
101 static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
102                                     CCValAssign::LocInfo LocInfo,
103                                     ISD::ArgFlagsTy ArgFlags,
104                                     CCState &State) {
105   if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
106     return false;
107   if (LocVT == MVT::v2f64 &&
108       !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
109     return false;
110   return true;  // we handled it
111 }
112 
113 static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,
114                          CCValAssign::LocInfo LocInfo, CCState &State) {
115   static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
116   static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
117 
118   MCRegister Reg = State.AllocateReg(HiRegList, LoRegList);
119   if (!Reg)
120     return false; // we didn't handle it
121 
122   unsigned i;
123   for (i = 0; i < 2; ++i)
124     if (HiRegList[i] == Reg)
125       break;
126 
127   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
128   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
129                                          LocVT, LocInfo));
130   return true;
131 }
132 
133 static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
134                                       CCValAssign::LocInfo LocInfo,
135                                       ISD::ArgFlagsTy ArgFlags,
136                                       CCState &State) {
137   if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
138     return false;
139   if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
140     return false;
141   return true;  // we handled it
142 }
143 
144 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
145                                        CCValAssign::LocInfo LocInfo,
146                                        ISD::ArgFlagsTy ArgFlags,
147                                        CCState &State) {
148   return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
149                                    State);
150 }
151 
152 static const MCPhysReg RRegList[] = { ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3 };
153 
154 static const MCPhysReg SRegList[] = { ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
155                                       ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
156                                       ARM::S8,  ARM::S9,  ARM::S10, ARM::S11,
157                                       ARM::S12, ARM::S13, ARM::S14,  ARM::S15 };
158 static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
159                                       ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
160 static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
161 
162 
163 // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
164 // has InConsecutiveRegs set, and that the last member also has
165 // InConsecutiveRegsLast set. We must process all members of the HA before
166 // we can allocate it, as we need to know the total number of registers that
167 // will be needed in order to (attempt to) allocate a contiguous block.
168 static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
169                                           MVT LocVT,
170                                           CCValAssign::LocInfo LocInfo,
171                                           ISD::ArgFlagsTy ArgFlags,
172                                           CCState &State) {
173   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
174 
175   // AAPCS HFAs must have 1-4 elements, all of the same type
176   if (PendingMembers.size() > 0)
177     assert(PendingMembers[0].getLocVT() == LocVT);
178 
179   // Add the argument to the list to be allocated once we know the size of the
180   // aggregate. Store the type's required alignment as extra info for later: in
181   // the [N x i64] case all trace has been removed by the time we actually get
182   // to do allocation.
183   PendingMembers.push_back(CCValAssign::getPending(
184       ValNo, ValVT, LocVT, LocInfo, ArgFlags.getNonZeroOrigAlign().value()));
185 
186   if (!ArgFlags.isInConsecutiveRegsLast())
187     return true;
188 
189   // Try to allocate a contiguous block of registers, each of the correct
190   // size to hold one member.
191   auto &DL = State.getMachineFunction().getDataLayout();
192   const MaybeAlign StackAlign = DL.getStackAlignment();
193   assert(StackAlign && "data layout string is missing stack alignment");
194   const Align FirstMemberAlign(PendingMembers[0].getExtraInfo());
195   Align Alignment = std::min(FirstMemberAlign, *StackAlign);
196 
197   ArrayRef<MCPhysReg> RegList;
198   switch (LocVT.SimpleTy) {
199   case MVT::i32: {
200     RegList = RRegList;
201     unsigned RegIdx = State.getFirstUnallocated(RegList);
202 
203     // First consume all registers that would give an unaligned object. Whether
204     // we go on stack or in regs, no-one will be using them in future.
205     unsigned RegAlign = alignTo(Alignment.value(), 4) / 4;
206     while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
207       State.AllocateReg(RegList[RegIdx++]);
208 
209     break;
210   }
211   case MVT::f16:
212   case MVT::bf16:
213   case MVT::f32:
214     RegList = SRegList;
215     break;
216   case MVT::v4f16:
217   case MVT::v4bf16:
218   case MVT::f64:
219     RegList = DRegList;
220     break;
221   case MVT::v8f16:
222   case MVT::v8bf16:
223   case MVT::v2f64:
224     RegList = QRegList;
225     break;
226   default:
227     llvm_unreachable("Unexpected member type for block aggregate");
228     break;
229   }
230 
231   ArrayRef<MCPhysReg> RegResult =
232       State.AllocateRegBlock(RegList, PendingMembers.size());
233   if (!RegResult.empty()) {
234     for (const auto &[PendingMember, Reg] : zip(PendingMembers, RegResult)) {
235       PendingMember.convertToReg(Reg);
236       State.addLoc(PendingMember);
237     }
238     PendingMembers.clear();
239     return true;
240   }
241 
242   // Register allocation failed, we'll be needing the stack
243   unsigned Size = LocVT.getSizeInBits() / 8;
244   if (LocVT == MVT::i32 && State.getStackSize() == 0) {
245     // If nothing else has used the stack until this point, a non-HFA aggregate
246     // can be split between regs and stack.
247     unsigned RegIdx = State.getFirstUnallocated(RegList);
248     for (auto &It : PendingMembers) {
249       if (RegIdx >= RegList.size())
250         It.convertToMem(State.AllocateStack(Size, Align(Size)));
251       else
252         It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
253 
254       State.addLoc(It);
255     }
256     PendingMembers.clear();
257     return true;
258   }
259 
260   if (LocVT != MVT::i32)
261     RegList = SRegList;
262 
263   // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
264   for (auto Reg : RegList)
265     State.AllocateReg(Reg);
266 
267   // Clamp the alignment between 4 and 8.
268   if (State.getMachineFunction().getSubtarget<ARMSubtarget>().isTargetAEABI())
269     Alignment = ArgFlags.getNonZeroMemAlign() <= 4 ? Align(4) : Align(8);
270 
271   // After the first item has been allocated, the rest are packed as tightly as
272   // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
273   // be allocating a bunch of i32 slots).
274   for (auto &It : PendingMembers) {
275     It.convertToMem(State.AllocateStack(Size, Alignment));
276     State.addLoc(It);
277     Alignment = Align(1);
278   }
279 
280   // All pending members have now been allocated
281   PendingMembers.clear();
282 
283   // This will be allocated by the last member of the aggregate
284   return true;
285 }
286 
287 static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
288                                   CCValAssign::LocInfo LocInfo, CCState &State,
289                                   ArrayRef<MCPhysReg> RegList) {
290   MCRegister Reg = State.AllocateReg(RegList);
291   if (Reg) {
292     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
293     return true;
294   }
295   return false;
296 }
297 
298 static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
299                                     CCValAssign::LocInfo LocInfo,
300                                     ISD::ArgFlagsTy ArgFlags, CCState &State) {
301   // f16 arguments are extended to i32 and assigned to a register in [r0, r3]
302   return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
303                                RRegList);
304 }
305 
306 static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
307                                         CCValAssign::LocInfo LocInfo,
308                                         ISD::ArgFlagsTy ArgFlags,
309                                         CCState &State) {
310   // f16 arguments are extended to f32 and assigned to a register in [s0, s15]
311   return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
312                                SRegList);
313 }
314 
315 // Include the table generated calling convention implementations.
316 #include "ARMGenCallingConv.inc"
317