1 //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the table-generated and custom routines for the AArch64 10 // Calling Convention. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64CallingConvention.h" 15 #include "AArch64.h" 16 #include "AArch64InstrInfo.h" 17 #include "AArch64Subtarget.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/TargetInstrInfo.h" 20 using namespace llvm; 21 22 static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, 23 AArch64::X3, AArch64::X4, AArch64::X5, 24 AArch64::X6, AArch64::X7}; 25 static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, 26 AArch64::H3, AArch64::H4, AArch64::H5, 27 AArch64::H6, AArch64::H7}; 28 static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, 29 AArch64::S3, AArch64::S4, AArch64::S5, 30 AArch64::S6, AArch64::S7}; 31 static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, 32 AArch64::D3, AArch64::D4, AArch64::D5, 33 AArch64::D6, AArch64::D7}; 34 static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, 35 AArch64::Q3, AArch64::Q4, AArch64::Q5, 36 AArch64::Q6, AArch64::Q7}; 37 static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, 38 AArch64::Z3, AArch64::Z4, AArch64::Z5, 39 AArch64::Z6, AArch64::Z7}; 40 static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2, 41 AArch64::P3}; 42 43 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, 44 MVT LocVT, ISD::ArgFlagsTy &ArgFlags, 45 CCState &State, Align SlotAlign) { 46 if (LocVT.isScalableVector()) { 47 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( 48 State.getMachineFunction().getSubtarget()); 49 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); 50 51 // We are about to reinvoke the CCAssignFn auto-generated handler. If we 52 // don't unset these flags we will get stuck in an infinite loop forever 53 // invoking the custom handler. 54 ArgFlags.setInConsecutiveRegs(false); 55 ArgFlags.setInConsecutiveRegsLast(false); 56 57 // The calling convention for passing SVE tuples states that in the event 58 // we cannot allocate enough registers for the tuple we should still leave 59 // any remaining registers unallocated. However, when we call the 60 // CCAssignFn again we want it to behave as if all remaining registers are 61 // allocated. This will force the code to pass the tuple indirectly in 62 // accordance with the PCS. 63 bool ZRegsAllocated[8]; 64 for (int I = 0; I < 8; I++) { 65 ZRegsAllocated[I] = State.isAllocated(ZRegList[I]); 66 State.AllocateReg(ZRegList[I]); 67 } 68 // The same applies to P registers. 69 bool PRegsAllocated[4]; 70 for (int I = 0; I < 4; I++) { 71 PRegsAllocated[I] = State.isAllocated(PRegList[I]); 72 State.AllocateReg(PRegList[I]); 73 } 74 75 auto &It = PendingMembers[0]; 76 CCAssignFn *AssignFn = 77 TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false); 78 if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, 79 ArgFlags, State)) 80 llvm_unreachable("Call operand has unhandled type"); 81 82 // Return the flags to how they were before. 83 ArgFlags.setInConsecutiveRegs(true); 84 ArgFlags.setInConsecutiveRegsLast(true); 85 86 // Return the register state back to how it was before, leaving any 87 // unallocated registers available for other smaller types. 88 for (int I = 0; I < 8; I++) 89 if (!ZRegsAllocated[I]) 90 State.DeallocateReg(ZRegList[I]); 91 for (int I = 0; I < 4; I++) 92 if (!PRegsAllocated[I]) 93 State.DeallocateReg(PRegList[I]); 94 95 // All pending members have now been allocated 96 PendingMembers.clear(); 97 return true; 98 } 99 100 unsigned Size = LocVT.getSizeInBits() / 8; 101 for (auto &It : PendingMembers) { 102 It.convertToMem(State.AllocateStack(Size, SlotAlign)); 103 State.addLoc(It); 104 SlotAlign = Align(1); 105 } 106 107 // All pending members have now been allocated 108 PendingMembers.clear(); 109 return true; 110 } 111 112 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An 113 /// [N x Ty] type must still be contiguous in memory though. 114 static bool CC_AArch64_Custom_Stack_Block( 115 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, 116 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 117 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 118 119 // Add the argument to the list to be allocated once we know the size of the 120 // block. 121 PendingMembers.push_back( 122 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 123 124 if (!ArgFlags.isInConsecutiveRegsLast()) 125 return true; 126 127 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8)); 128 } 129 130 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of 131 /// registers. If no such sequence is available, mark the rest of the registers 132 /// of that type as used and place the argument on the stack. 133 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 134 CCValAssign::LocInfo &LocInfo, 135 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 136 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( 137 State.getMachineFunction().getSubtarget()); 138 bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); 139 140 // Try to allocate a contiguous block of registers, each of the correct 141 // size to hold one member. 142 ArrayRef<MCPhysReg> RegList; 143 if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) 144 RegList = XRegList; 145 else if (LocVT.SimpleTy == MVT::f16) 146 RegList = HRegList; 147 else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector()) 148 RegList = SRegList; 149 else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector()) 150 RegList = DRegList; 151 else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) 152 RegList = QRegList; 153 else if (LocVT.isScalableVector()) { 154 // Scalable masks should be pass by Predicate registers. 155 if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 || 156 LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 || 157 LocVT == MVT::aarch64svcount) 158 RegList = PRegList; 159 else 160 RegList = ZRegList; 161 } else { 162 // Not an array we want to split up after all. 163 return false; 164 } 165 166 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 167 168 // Add the argument to the list to be allocated once we know the size of the 169 // block. 170 PendingMembers.push_back( 171 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 172 173 if (!ArgFlags.isInConsecutiveRegsLast()) 174 return true; 175 176 // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 177 // because that's how the armv7k Clang front-end emits small structs. 178 unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; 179 ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock( 180 RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); 181 if (!RegResult.empty() && EltsPerReg == 1) { 182 for (const auto &[It, Reg] : zip(PendingMembers, RegResult)) { 183 It.convertToReg(Reg); 184 State.addLoc(It); 185 } 186 PendingMembers.clear(); 187 return true; 188 } else if (!RegResult.empty()) { 189 assert(EltsPerReg == 2 && "unexpected ABI"); 190 bool UseHigh = false; 191 CCValAssign::LocInfo Info; 192 unsigned RegIdx = 0; 193 for (auto &It : PendingMembers) { 194 Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; 195 State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, 196 RegResult[RegIdx], MVT::i64, Info)); 197 UseHigh = !UseHigh; 198 if (!UseHigh) 199 ++RegIdx; 200 } 201 PendingMembers.clear(); 202 return true; 203 } 204 205 if (!LocVT.isScalableVector()) { 206 // Mark all regs in the class as unavailable 207 for (auto Reg : RegList) 208 State.AllocateReg(Reg); 209 } 210 211 const MaybeAlign StackAlign = 212 State.getMachineFunction().getDataLayout().getStackAlignment(); 213 assert(StackAlign && "data layout string is missing stack alignment"); 214 const Align MemAlign = ArgFlags.getNonZeroMemAlign(); 215 Align SlotAlign = std::min(MemAlign, *StackAlign); 216 if (!Subtarget.isTargetDarwin()) 217 SlotAlign = std::max(SlotAlign, Align(8)); 218 219 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); 220 } 221 222 // TableGen provides definitions of the calling convention analysis entry 223 // points. 224 #include "AArch64GenCallingConv.inc" 225