xref: /llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp (revision 6fe0fc60341b05bf30ccc16012dab9eeb55a338d)
1 //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the table-generated and custom routines for the AArch64
10 // Calling Convention.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64CallingConvention.h"
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64Subtarget.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 using namespace llvm;
21 
22 static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
23                                      AArch64::X3, AArch64::X4, AArch64::X5,
24                                      AArch64::X6, AArch64::X7};
25 static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
26                                      AArch64::H3, AArch64::H4, AArch64::H5,
27                                      AArch64::H6, AArch64::H7};
28 static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
29                                      AArch64::S3, AArch64::S4, AArch64::S5,
30                                      AArch64::S6, AArch64::S7};
31 static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
32                                      AArch64::D3, AArch64::D4, AArch64::D5,
33                                      AArch64::D6, AArch64::D7};
34 static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
35                                      AArch64::Q3, AArch64::Q4, AArch64::Q5,
36                                      AArch64::Q6, AArch64::Q7};
37 static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
38                                      AArch64::Z3, AArch64::Z4, AArch64::Z5,
39                                      AArch64::Z6, AArch64::Z7};
40 static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2,
41                                      AArch64::P3};
42 
43 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
44                              MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
45                              CCState &State, Align SlotAlign) {
46   if (LocVT.isScalableVector()) {
47     const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
48         State.getMachineFunction().getSubtarget());
49     const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
50 
51     // We are about to reinvoke the CCAssignFn auto-generated handler. If we
52     // don't unset these flags we will get stuck in an infinite loop forever
53     // invoking the custom handler.
54     ArgFlags.setInConsecutiveRegs(false);
55     ArgFlags.setInConsecutiveRegsLast(false);
56 
57     // The calling convention for passing SVE tuples states that in the event
58     // we cannot allocate enough registers for the tuple we should still leave
59     // any remaining registers unallocated. However, when we call the
60     // CCAssignFn again we want it to behave as if all remaining registers are
61     // allocated. This will force the code to pass the tuple indirectly in
62     // accordance with the PCS.
63     bool ZRegsAllocated[8];
64     for (int I = 0; I < 8; I++) {
65       ZRegsAllocated[I] = State.isAllocated(ZRegList[I]);
66       State.AllocateReg(ZRegList[I]);
67     }
68     // The same applies to P registers.
69     bool PRegsAllocated[4];
70     for (int I = 0; I < 4; I++) {
71       PRegsAllocated[I] = State.isAllocated(PRegList[I]);
72       State.AllocateReg(PRegList[I]);
73     }
74 
75     auto &It = PendingMembers[0];
76     CCAssignFn *AssignFn =
77         TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
78     if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
79                  ArgFlags, State))
80       llvm_unreachable("Call operand has unhandled type");
81 
82     // Return the flags to how they were before.
83     ArgFlags.setInConsecutiveRegs(true);
84     ArgFlags.setInConsecutiveRegsLast(true);
85 
86     // Return the register state back to how it was before, leaving any
87     // unallocated registers available for other smaller types.
88     for (int I = 0; I < 8; I++)
89       if (!ZRegsAllocated[I])
90         State.DeallocateReg(ZRegList[I]);
91     for (int I = 0; I < 4; I++)
92       if (!PRegsAllocated[I])
93         State.DeallocateReg(PRegList[I]);
94 
95     // All pending members have now been allocated
96     PendingMembers.clear();
97     return true;
98   }
99 
100   unsigned Size = LocVT.getSizeInBits() / 8;
101   for (auto &It : PendingMembers) {
102     It.convertToMem(State.AllocateStack(Size, SlotAlign));
103     State.addLoc(It);
104     SlotAlign = Align(1);
105   }
106 
107   // All pending members have now been allocated
108   PendingMembers.clear();
109   return true;
110 }
111 
112 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
113 /// [N x Ty] type must still be contiguous in memory though.
114 static bool CC_AArch64_Custom_Stack_Block(
115       unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
116       ISD::ArgFlagsTy &ArgFlags, CCState &State) {
117   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
118 
119   // Add the argument to the list to be allocated once we know the size of the
120   // block.
121   PendingMembers.push_back(
122       CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
123 
124   if (!ArgFlags.isInConsecutiveRegsLast())
125     return true;
126 
127   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
128 }
129 
130 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of
131 /// registers. If no such sequence is available, mark the rest of the registers
132 /// of that type as used and place the argument on the stack.
133 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
134                                     CCValAssign::LocInfo &LocInfo,
135                                     ISD::ArgFlagsTy &ArgFlags, CCState &State) {
136   const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
137       State.getMachineFunction().getSubtarget());
138   bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
139 
140   // Try to allocate a contiguous block of registers, each of the correct
141   // size to hold one member.
142   ArrayRef<MCPhysReg> RegList;
143   if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
144     RegList = XRegList;
145   else if (LocVT.SimpleTy == MVT::f16)
146     RegList = HRegList;
147   else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
148     RegList = SRegList;
149   else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
150     RegList = DRegList;
151   else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
152     RegList = QRegList;
153   else if (LocVT.isScalableVector()) {
154     // Scalable masks should be pass by Predicate registers.
155     if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 ||
156         LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 ||
157         LocVT == MVT::aarch64svcount)
158       RegList = PRegList;
159     else
160       RegList = ZRegList;
161   } else {
162     // Not an array we want to split up after all.
163     return false;
164   }
165 
166   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
167 
168   // Add the argument to the list to be allocated once we know the size of the
169   // block.
170   PendingMembers.push_back(
171       CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
172 
173   if (!ArgFlags.isInConsecutiveRegsLast())
174     return true;
175 
176   // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
177   // because that's how the armv7k Clang front-end emits small structs.
178   unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
179   ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock(
180       RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
181   if (!RegResult.empty() && EltsPerReg == 1) {
182     for (const auto &[It, Reg] : zip(PendingMembers, RegResult)) {
183       It.convertToReg(Reg);
184       State.addLoc(It);
185     }
186     PendingMembers.clear();
187     return true;
188   } else if (!RegResult.empty()) {
189     assert(EltsPerReg == 2 && "unexpected ABI");
190     bool UseHigh = false;
191     CCValAssign::LocInfo Info;
192     unsigned RegIdx = 0;
193     for (auto &It : PendingMembers) {
194       Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
195       State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32,
196                                        RegResult[RegIdx], MVT::i64, Info));
197       UseHigh = !UseHigh;
198       if (!UseHigh)
199         ++RegIdx;
200     }
201     PendingMembers.clear();
202     return true;
203   }
204 
205   if (!LocVT.isScalableVector()) {
206     // Mark all regs in the class as unavailable
207     for (auto Reg : RegList)
208       State.AllocateReg(Reg);
209   }
210 
211   const MaybeAlign StackAlign =
212       State.getMachineFunction().getDataLayout().getStackAlignment();
213   assert(StackAlign && "data layout string is missing stack alignment");
214   const Align MemAlign = ArgFlags.getNonZeroMemAlign();
215   Align SlotAlign = std::min(MemAlign, *StackAlign);
216   if (!Subtarget.isTargetDarwin())
217     SlotAlign = std::max(SlotAlign, Align(8));
218 
219   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
220 }
221 
222 // TableGen provides definitions of the calling convention analysis entry
223 // points.
224 #include "AArch64GenCallingConv.inc"
225