1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
46
47 #define DEBUG_TYPE "aarch64-call-lowering"
48
49 using namespace llvm;
50
AArch64CallLowering(const AArch64TargetLowering & TLI)51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52 : CallLowering(&TLI) {}
53
applyStackPassedSmallTypeDAGHack(EVT OrigVT,MVT & ValVT,MVT & LocVT)54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
55 MVT &LocVT) {
56 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
57 // hack because the DAG calls the assignment function with pre-legalized
58 // register typed values, not the raw type.
59 //
60 // This hack is not applied to return values which are not passed on the
61 // stack.
62 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
63 ValVT = LocVT = MVT::i8;
64 else if (OrigVT == MVT::i16)
65 ValVT = LocVT = MVT::i16;
66 }
67
68 // Account for i1/i8/i16 stack passed value hack
getStackValueStoreSizeHack(const CCValAssign & VA)69 static uint64_t getStackValueStoreSizeHack(const CCValAssign &VA) {
70 const MVT ValVT = VA.getValVT();
71 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? ValVT.getStoreSize()
72 : VA.getLocVT().getStoreSize();
73 }
74
75 namespace {
76
77 struct AArch64IncomingValueAssigner
78 : public CallLowering::IncomingValueAssigner {
AArch64IncomingValueAssigner__anonb07d12780111::AArch64IncomingValueAssigner79 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
80 CCAssignFn *AssignFnVarArg_)
81 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
82
assignArg__anonb07d12780111::AArch64IncomingValueAssigner83 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
84 CCValAssign::LocInfo LocInfo,
85 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
86 CCState &State) override {
87 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
88 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
89 LocInfo, Info, Flags, State);
90 }
91 };
92
93 struct AArch64OutgoingValueAssigner
94 : public CallLowering::OutgoingValueAssigner {
95 const AArch64Subtarget &Subtarget;
96
97 /// Track if this is used for a return instead of function argument
98 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
99 /// stack passed returns for them and cannot apply the type adjustment.
100 bool IsReturn;
101
AArch64OutgoingValueAssigner__anonb07d12780111::AArch64OutgoingValueAssigner102 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
103 CCAssignFn *AssignFnVarArg_,
104 const AArch64Subtarget &Subtarget_,
105 bool IsReturn)
106 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
107 Subtarget(Subtarget_), IsReturn(IsReturn) {}
108
assignArg__anonb07d12780111::AArch64OutgoingValueAssigner109 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
110 CCValAssign::LocInfo LocInfo,
111 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
112 CCState &State) override {
113 bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
114 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
115
116 if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
117 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
118
119 bool Res;
120 if (Info.IsFixed && !UseVarArgsCCForFixed)
121 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
122 else
123 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
124
125 StackOffset = State.getNextStackOffset();
126 return Res;
127 }
128 };
129
130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingArgHandler__anonb07d12780111::IncomingArgHandler131 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
132 : IncomingValueHandler(MIRBuilder, MRI) {}
133
getStackAddress__anonb07d12780111::IncomingArgHandler134 Register getStackAddress(uint64_t Size, int64_t Offset,
135 MachinePointerInfo &MPO,
136 ISD::ArgFlagsTy Flags) override {
137 auto &MFI = MIRBuilder.getMF().getFrameInfo();
138
139 // Byval is assumed to be writable memory, but other stack passed arguments
140 // are not.
141 const bool IsImmutable = !Flags.isByVal();
142
143 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
144 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
145 auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
146 return AddrReg.getReg(0);
147 }
148
getStackValueStoreSize__anonb07d12780111::IncomingArgHandler149 uint64_t getStackValueStoreSize(const DataLayout &,
150 const CCValAssign &VA) const override {
151 return getStackValueStoreSizeHack(VA);
152 }
153
assignValueToReg__anonb07d12780111::IncomingArgHandler154 void assignValueToReg(Register ValVReg, Register PhysReg,
155 CCValAssign &VA) override {
156 markPhysRegUsed(PhysReg);
157 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
158 }
159
assignValueToAddress__anonb07d12780111::IncomingArgHandler160 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
161 MachinePointerInfo &MPO, CCValAssign &VA) override {
162 MachineFunction &MF = MIRBuilder.getMF();
163
164 // The reported memory location may be wider than the value.
165 const LLT RealRegTy = MRI.getType(ValVReg);
166 LLT ValTy(VA.getValVT());
167 LLT LocTy(VA.getLocVT());
168
169 // Fixup the types for the DAG compatibility hack.
170 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
171 std::swap(ValTy, LocTy);
172
173 MemSize = LocTy.getSizeInBytes();
174
175 auto MMO = MF.getMachineMemOperand(
176 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
177 MemSize, inferAlignFromPtrInfo(MF, MPO));
178
179 if (RealRegTy.getSizeInBits() == ValTy.getSizeInBits()) {
180 // No extension information, or no extension necessary. Load into the
181 // incoming parameter type directly.
182 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
183 } else {
184 auto Tmp = MIRBuilder.buildLoad(LocTy, Addr, *MMO);
185 MIRBuilder.buildTrunc(ValVReg, Tmp);
186 }
187 }
188
189 /// How the physical register gets marked varies between formal
190 /// parameters (it's a basic-block live-in), and a call instruction
191 /// (it's an implicit-def of the BL).
192 virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
193 };
194
195 struct FormalArgHandler : public IncomingArgHandler {
FormalArgHandler__anonb07d12780111::FormalArgHandler196 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
197 : IncomingArgHandler(MIRBuilder, MRI) {}
198
markPhysRegUsed__anonb07d12780111::FormalArgHandler199 void markPhysRegUsed(MCRegister PhysReg) override {
200 MIRBuilder.getMRI()->addLiveIn(PhysReg);
201 MIRBuilder.getMBB().addLiveIn(PhysReg);
202 }
203 };
204
205 struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler__anonb07d12780111::CallReturnHandler206 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
207 MachineInstrBuilder MIB)
208 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
209
markPhysRegUsed__anonb07d12780111::CallReturnHandler210 void markPhysRegUsed(MCRegister PhysReg) override {
211 MIB.addDef(PhysReg, RegState::Implicit);
212 }
213
214 MachineInstrBuilder MIB;
215 };
216
217 /// A special return arg handler for "returned" attribute arg calls.
218 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
ReturnedArgCallReturnHandler__anonb07d12780111::ReturnedArgCallReturnHandler219 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
220 MachineRegisterInfo &MRI,
221 MachineInstrBuilder MIB)
222 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
223
markPhysRegUsed__anonb07d12780111::ReturnedArgCallReturnHandler224 void markPhysRegUsed(MCRegister PhysReg) override {}
225 };
226
227 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
OutgoingArgHandler__anonb07d12780111::OutgoingArgHandler228 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
229 MachineInstrBuilder MIB, bool IsTailCall = false,
230 int FPDiff = 0)
231 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
232 FPDiff(FPDiff),
233 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
234
getStackAddress__anonb07d12780111::OutgoingArgHandler235 Register getStackAddress(uint64_t Size, int64_t Offset,
236 MachinePointerInfo &MPO,
237 ISD::ArgFlagsTy Flags) override {
238 MachineFunction &MF = MIRBuilder.getMF();
239 LLT p0 = LLT::pointer(0, 64);
240 LLT s64 = LLT::scalar(64);
241
242 if (IsTailCall) {
243 assert(!Flags.isByVal() && "byval unhandled with tail calls");
244
245 Offset += FPDiff;
246 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
247 auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
248 MPO = MachinePointerInfo::getFixedStack(MF, FI);
249 return FIReg.getReg(0);
250 }
251
252 if (!SPReg)
253 SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
254
255 auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
256
257 auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
258
259 MPO = MachinePointerInfo::getStack(MF, Offset);
260 return AddrReg.getReg(0);
261 }
262
263 /// We need to fixup the reported store size for certain value types because
264 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
265 /// for compatability with the DAG call lowering implementation, which we're
266 /// currently building on top of.
getStackValueStoreSize__anonb07d12780111::OutgoingArgHandler267 uint64_t getStackValueStoreSize(const DataLayout &,
268 const CCValAssign &VA) const override {
269 return getStackValueStoreSizeHack(VA);
270 }
271
assignValueToReg__anonb07d12780111::OutgoingArgHandler272 void assignValueToReg(Register ValVReg, Register PhysReg,
273 CCValAssign &VA) override {
274 MIB.addUse(PhysReg, RegState::Implicit);
275 Register ExtReg = extendRegister(ValVReg, VA);
276 MIRBuilder.buildCopy(PhysReg, ExtReg);
277 }
278
assignValueToAddress__anonb07d12780111::OutgoingArgHandler279 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
280 MachinePointerInfo &MPO, CCValAssign &VA) override {
281 MachineFunction &MF = MIRBuilder.getMF();
282 auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size,
283 inferAlignFromPtrInfo(MF, MPO));
284 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
285 }
286
assignValueToAddress__anonb07d12780111::OutgoingArgHandler287 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
288 Register Addr, uint64_t MemSize,
289 MachinePointerInfo &MPO, CCValAssign &VA) override {
290 unsigned MaxSize = MemSize * 8;
291 // For varargs, we always want to extend them to 8 bytes, in which case
292 // we disable setting a max.
293 if (!Arg.IsFixed)
294 MaxSize = 0;
295
296 Register ValVReg = Arg.Regs[RegIndex];
297 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
298 MVT LocVT = VA.getLocVT();
299 MVT ValVT = VA.getValVT();
300
301 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
302 std::swap(ValVT, LocVT);
303 MemSize = VA.getValVT().getStoreSize();
304 }
305
306 ValVReg = extendRegister(ValVReg, VA, MaxSize);
307 const LLT RegTy = MRI.getType(ValVReg);
308
309 if (RegTy.getSizeInBits() < LocVT.getSizeInBits())
310 ValVReg = MIRBuilder.buildTrunc(RegTy, ValVReg).getReg(0);
311 } else {
312 // The store does not cover the full allocated stack slot.
313 MemSize = VA.getValVT().getStoreSize();
314 }
315
316 assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
317 }
318
319 MachineInstrBuilder MIB;
320
321 bool IsTailCall;
322
323 /// For tail calls, the byte offset of the call's argument area from the
324 /// callee's. Unused elsewhere.
325 int FPDiff;
326
327 // Cache the SP register vreg if we need it more than once in this call site.
328 Register SPReg;
329
330 const AArch64Subtarget &Subtarget;
331 };
332 } // namespace
333
doesCalleeRestoreStack(CallingConv::ID CallConv,bool TailCallOpt)334 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
335 return (CallConv == CallingConv::Fast && TailCallOpt) ||
336 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
337 }
338
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<Register> VRegs,FunctionLoweringInfo & FLI,Register SwiftErrorVReg) const339 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
340 const Value *Val,
341 ArrayRef<Register> VRegs,
342 FunctionLoweringInfo &FLI,
343 Register SwiftErrorVReg) const {
344 auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
345 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
346 "Return value without a vreg");
347
348 bool Success = true;
349 if (!VRegs.empty()) {
350 MachineFunction &MF = MIRBuilder.getMF();
351 const Function &F = MF.getFunction();
352 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
353
354 MachineRegisterInfo &MRI = MF.getRegInfo();
355 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
356 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
357 auto &DL = F.getParent()->getDataLayout();
358 LLVMContext &Ctx = Val->getType()->getContext();
359
360 SmallVector<EVT, 4> SplitEVTs;
361 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
362 assert(VRegs.size() == SplitEVTs.size() &&
363 "For each split Type there should be exactly one VReg.");
364
365 SmallVector<ArgInfo, 8> SplitArgs;
366 CallingConv::ID CC = F.getCallingConv();
367
368 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
369 if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
370 LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
371 return false;
372 }
373
374 Register CurVReg = VRegs[i];
375 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
376 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
377
378 // i1 is a special case because SDAG i1 true is naturally zero extended
379 // when widened using ANYEXT. We need to do it explicitly here.
380 if (MRI.getType(CurVReg).getSizeInBits() == 1) {
381 CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
382 } else {
383 // Some types will need extending as specified by the CC.
384 MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
385 if (EVT(NewVT) != SplitEVTs[i]) {
386 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
387 if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
388 Attribute::SExt))
389 ExtendOp = TargetOpcode::G_SEXT;
390 else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
391 Attribute::ZExt))
392 ExtendOp = TargetOpcode::G_ZEXT;
393
394 LLT NewLLT(NewVT);
395 LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
396 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
397 // Instead of an extend, we might have a vector type which needs
398 // padding with more elements, e.g. <2 x half> -> <4 x half>.
399 if (NewVT.isVector()) {
400 if (OldLLT.isVector()) {
401 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
402 // We don't handle VA types which are not exactly twice the
403 // size, but can easily be done in future.
404 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
405 LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
406 return false;
407 }
408 auto Undef = MIRBuilder.buildUndef({OldLLT});
409 CurVReg =
410 MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
411 } else {
412 // Just do a vector extend.
413 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
414 .getReg(0);
415 }
416 } else if (NewLLT.getNumElements() == 2) {
417 // We need to pad a <1 x S> type to <2 x S>. Since we don't have
418 // <1 x S> vector types in GISel we use a build_vector instead
419 // of a vector merge/concat.
420 auto Undef = MIRBuilder.buildUndef({OldLLT});
421 CurVReg =
422 MIRBuilder
423 .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
424 .getReg(0);
425 } else {
426 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
427 return false;
428 }
429 } else {
430 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
431 // don't have to do anything since we don't distinguish between the
432 // two.
433 if (NewLLT != MRI.getType(CurVReg)) {
434 // A scalar extend.
435 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
436 .getReg(0);
437 }
438 }
439 }
440 }
441 if (CurVReg != CurArgInfo.Regs[0]) {
442 CurArgInfo.Regs[0] = CurVReg;
443 // Reset the arg flags after modifying CurVReg.
444 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
445 }
446 splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
447 }
448
449 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
450 /*IsReturn*/ true);
451 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
452 Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
453 MIRBuilder, CC, F.isVarArg());
454 }
455
456 if (SwiftErrorVReg) {
457 MIB.addUse(AArch64::X21, RegState::Implicit);
458 MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
459 }
460
461 MIRBuilder.insertInstr(MIB);
462 return Success;
463 }
464
465 /// Helper function to compute forwarded registers for musttail calls. Computes
466 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
467 /// can be used to save + restore registers later.
handleMustTailForwardedRegisters(MachineIRBuilder & MIRBuilder,CCAssignFn * AssignFn)468 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
469 CCAssignFn *AssignFn) {
470 MachineBasicBlock &MBB = MIRBuilder.getMBB();
471 MachineFunction &MF = MIRBuilder.getMF();
472 MachineFrameInfo &MFI = MF.getFrameInfo();
473
474 if (!MFI.hasMustTailInVarArgFunc())
475 return;
476
477 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
478 const Function &F = MF.getFunction();
479 assert(F.isVarArg() && "Expected F to be vararg?");
480
481 // Compute the set of forwarded registers. The rest are scratch.
482 SmallVector<CCValAssign, 16> ArgLocs;
483 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
484 F.getContext());
485 SmallVector<MVT, 2> RegParmTypes;
486 RegParmTypes.push_back(MVT::i64);
487 RegParmTypes.push_back(MVT::f128);
488
489 // Later on, we can use this vector to restore the registers if necessary.
490 SmallVectorImpl<ForwardedRegister> &Forwards =
491 FuncInfo->getForwardedMustTailRegParms();
492 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
493
494 // Conservatively forward X8, since it might be used for an aggregate
495 // return.
496 if (!CCInfo.isAllocated(AArch64::X8)) {
497 Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
498 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
499 }
500
501 // Add the forwards to the MachineBasicBlock and MachineFunction.
502 for (const auto &F : Forwards) {
503 MBB.addLiveIn(F.PReg);
504 MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
505 }
506 }
507
fallBackToDAGISel(const MachineFunction & MF) const508 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
509 auto &F = MF.getFunction();
510 if (isa<ScalableVectorType>(F.getReturnType()))
511 return true;
512 if (llvm::any_of(F.args(), [](const Argument &A) {
513 return isa<ScalableVectorType>(A.getType());
514 }))
515 return true;
516 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
517 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
518 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
519 return true;
520 }
521 return false;
522 }
523
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<ArrayRef<Register>> VRegs,FunctionLoweringInfo & FLI) const524 bool AArch64CallLowering::lowerFormalArguments(
525 MachineIRBuilder &MIRBuilder, const Function &F,
526 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
527 MachineFunction &MF = MIRBuilder.getMF();
528 MachineBasicBlock &MBB = MIRBuilder.getMBB();
529 MachineRegisterInfo &MRI = MF.getRegInfo();
530 auto &DL = F.getParent()->getDataLayout();
531
532 SmallVector<ArgInfo, 8> SplitArgs;
533 unsigned i = 0;
534 for (auto &Arg : F.args()) {
535 if (DL.getTypeStoreSize(Arg.getType()).isZero())
536 continue;
537
538 ArgInfo OrigArg{VRegs[i], Arg};
539 setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
540
541 if (Arg.hasAttribute(Attribute::SwiftAsync))
542 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
543
544 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
545 ++i;
546 }
547
548 if (!MBB.empty())
549 MIRBuilder.setInstr(*MBB.begin());
550
551 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
552 CCAssignFn *AssignFn =
553 TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
554
555 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
556 FormalArgHandler Handler(MIRBuilder, MRI);
557 if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
558 F.getCallingConv(), F.isVarArg()))
559 return false;
560
561 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
562 uint64_t StackOffset = Assigner.StackOffset;
563 if (F.isVarArg()) {
564 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
565 if (!Subtarget.isTargetDarwin()) {
566 // FIXME: we need to reimplement saveVarArgsRegisters from
567 // AArch64ISelLowering.
568 return false;
569 }
570
571 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
572 StackOffset =
573 alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
574
575 auto &MFI = MIRBuilder.getMF().getFrameInfo();
576 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
577 }
578
579 if (doesCalleeRestoreStack(F.getCallingConv(),
580 MF.getTarget().Options.GuaranteedTailCallOpt)) {
581 // We have a non-standard ABI, so why not make full use of the stack that
582 // we're going to pop? It must be aligned to 16 B in any case.
583 StackOffset = alignTo(StackOffset, 16);
584
585 // If we're expected to restore the stack (e.g. fastcc), then we'll be
586 // adding a multiple of 16.
587 FuncInfo->setArgumentStackToRestore(StackOffset);
588
589 // Our own callers will guarantee that the space is free by giving an
590 // aligned value to CALLSEQ_START.
591 }
592
593 // When we tail call, we need to check if the callee's arguments
594 // will fit on the caller's stack. So, whenever we lower formal arguments,
595 // we should keep track of this information, since we might lower a tail call
596 // in this function later.
597 FuncInfo->setBytesInStackArgArea(StackOffset);
598
599 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
600 if (Subtarget.hasCustomCallingConv())
601 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
602
603 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
604
605 // Move back to the end of the basic block.
606 MIRBuilder.setMBB(MBB);
607
608 return true;
609 }
610
611 /// Return true if the calling convention is one that we can guarantee TCO for.
canGuaranteeTCO(CallingConv::ID CC,bool GuaranteeTailCalls)612 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
613 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
614 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
615 }
616
617 /// Return true if we might ever do TCO for calls with this calling convention.
mayTailCallThisCC(CallingConv::ID CC)618 static bool mayTailCallThisCC(CallingConv::ID CC) {
619 switch (CC) {
620 case CallingConv::C:
621 case CallingConv::PreserveMost:
622 case CallingConv::Swift:
623 case CallingConv::SwiftTail:
624 case CallingConv::Tail:
625 case CallingConv::Fast:
626 return true;
627 default:
628 return false;
629 }
630 }
631
632 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
633 /// CC.
634 static std::pair<CCAssignFn *, CCAssignFn *>
getAssignFnsForCC(CallingConv::ID CC,const AArch64TargetLowering & TLI)635 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
636 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
637 }
638
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & InArgs) const639 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
640 CallLoweringInfo &Info, MachineFunction &MF,
641 SmallVectorImpl<ArgInfo> &InArgs) const {
642 const Function &CallerF = MF.getFunction();
643 CallingConv::ID CalleeCC = Info.CallConv;
644 CallingConv::ID CallerCC = CallerF.getCallingConv();
645
646 // If the calling conventions match, then everything must be the same.
647 if (CalleeCC == CallerCC)
648 return true;
649
650 // Check if the caller and callee will handle arguments in the same way.
651 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
652 CCAssignFn *CalleeAssignFnFixed;
653 CCAssignFn *CalleeAssignFnVarArg;
654 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
655 getAssignFnsForCC(CalleeCC, TLI);
656
657 CCAssignFn *CallerAssignFnFixed;
658 CCAssignFn *CallerAssignFnVarArg;
659 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
660 getAssignFnsForCC(CallerCC, TLI);
661
662 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
663 CalleeAssignFnVarArg);
664 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
665 CallerAssignFnVarArg);
666
667 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
668 return false;
669
670 // Make sure that the caller and callee preserve all of the same registers.
671 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
672 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
673 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
674 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
675 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
676 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
677 }
678
679 return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
680 }
681
areCalleeOutgoingArgsTailCallable(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & OutArgs) const682 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
683 CallLoweringInfo &Info, MachineFunction &MF,
684 SmallVectorImpl<ArgInfo> &OutArgs) const {
685 // If there are no outgoing arguments, then we are done.
686 if (OutArgs.empty())
687 return true;
688
689 const Function &CallerF = MF.getFunction();
690 LLVMContext &Ctx = CallerF.getContext();
691 CallingConv::ID CalleeCC = Info.CallConv;
692 CallingConv::ID CallerCC = CallerF.getCallingConv();
693 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
694 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
695
696 CCAssignFn *AssignFnFixed;
697 CCAssignFn *AssignFnVarArg;
698 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
699
700 // We have outgoing arguments. Make sure that we can tail call with them.
701 SmallVector<CCValAssign, 16> OutLocs;
702 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
703
704 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
705 Subtarget, /*IsReturn*/ false);
706 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
707 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
708 return false;
709 }
710
711 // Make sure that they can fit on the caller's stack.
712 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
713 if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
714 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
715 return false;
716 }
717
718 // Verify that the parameters in callee-saved registers match.
719 // TODO: Port this over to CallLowering as general code once swiftself is
720 // supported.
721 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
722 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
723 MachineRegisterInfo &MRI = MF.getRegInfo();
724
725 if (Info.IsVarArg) {
726 // Be conservative and disallow variadic memory operands to match SDAG's
727 // behaviour.
728 // FIXME: If the caller's calling convention is C, then we can
729 // potentially use its argument area. However, for cases like fastcc,
730 // we can't do anything.
731 for (unsigned i = 0; i < OutLocs.size(); ++i) {
732 auto &ArgLoc = OutLocs[i];
733 if (ArgLoc.isRegLoc())
734 continue;
735
736 LLVM_DEBUG(
737 dbgs()
738 << "... Cannot tail call vararg function with stack arguments\n");
739 return false;
740 }
741 }
742
743 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
744 }
745
isEligibleForTailCallOptimization(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & InArgs,SmallVectorImpl<ArgInfo> & OutArgs) const746 bool AArch64CallLowering::isEligibleForTailCallOptimization(
747 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
748 SmallVectorImpl<ArgInfo> &InArgs,
749 SmallVectorImpl<ArgInfo> &OutArgs) const {
750
751 // Must pass all target-independent checks in order to tail call optimize.
752 if (!Info.IsTailCall)
753 return false;
754
755 CallingConv::ID CalleeCC = Info.CallConv;
756 MachineFunction &MF = MIRBuilder.getMF();
757 const Function &CallerF = MF.getFunction();
758
759 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
760
761 if (Info.SwiftErrorVReg) {
762 // TODO: We should handle this.
763 // Note that this is also handled by the check for no outgoing arguments.
764 // Proactively disabling this though, because the swifterror handling in
765 // lowerCall inserts a COPY *after* the location of the call.
766 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
767 return false;
768 }
769
770 if (!mayTailCallThisCC(CalleeCC)) {
771 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
772 return false;
773 }
774
775 // Byval parameters hand the function a pointer directly into the stack area
776 // we want to reuse during a tail call. Working around this *is* possible (see
777 // X86).
778 //
779 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
780 // it?
781 //
782 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
783 // In this case, it is necessary to save/restore X0 in the callee. Tail
784 // call opt interferes with this. So we disable tail call opt when the
785 // caller has an argument with "inreg" attribute.
786 //
787 // FIXME: Check whether the callee also has an "inreg" argument.
788 //
789 // When the caller has a swifterror argument, we don't want to tail call
790 // because would have to move into the swifterror register before the
791 // tail call.
792 if (any_of(CallerF.args(), [](const Argument &A) {
793 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
794 })) {
795 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
796 "inreg, or swifterror arguments\n");
797 return false;
798 }
799
800 // Externally-defined functions with weak linkage should not be
801 // tail-called on AArch64 when the OS does not support dynamic
802 // pre-emption of symbols, as the AAELF spec requires normal calls
803 // to undefined weak functions to be replaced with a NOP or jump to the
804 // next instruction. The behaviour of branch instructions in this
805 // situation (as used for tail calls) is implementation-defined, so we
806 // cannot rely on the linker replacing the tail call with a return.
807 if (Info.Callee.isGlobal()) {
808 const GlobalValue *GV = Info.Callee.getGlobal();
809 const Triple &TT = MF.getTarget().getTargetTriple();
810 if (GV->hasExternalWeakLinkage() &&
811 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
812 TT.isOSBinFormatMachO())) {
813 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
814 "with weak linkage for this OS.\n");
815 return false;
816 }
817 }
818
819 // If we have -tailcallopt, then we're done.
820 if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
821 return CalleeCC == CallerF.getCallingConv();
822
823 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
824 // Try to find cases where we can do that.
825
826 // I want anyone implementing a new calling convention to think long and hard
827 // about this assert.
828 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
829 "Unexpected variadic calling convention");
830
831 // Verify that the incoming and outgoing arguments from the callee are
832 // safe to tail call.
833 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
834 LLVM_DEBUG(
835 dbgs()
836 << "... Caller and callee have incompatible calling conventions.\n");
837 return false;
838 }
839
840 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
841 return false;
842
843 LLVM_DEBUG(
844 dbgs() << "... Call is eligible for tail call optimization.\n");
845 return true;
846 }
847
getCallOpcode(const MachineFunction & CallerF,bool IsIndirect,bool IsTailCall)848 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
849 bool IsTailCall) {
850 if (!IsTailCall)
851 return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
852
853 if (!IsIndirect)
854 return AArch64::TCRETURNdi;
855
856 // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
857 // x16 or x17.
858 if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
859 return AArch64::TCRETURNriBTI;
860
861 return AArch64::TCRETURNri;
862 }
863
864 static const uint32_t *
getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> & OutArgs,AArch64CallLowering::CallLoweringInfo & Info,const AArch64RegisterInfo & TRI,MachineFunction & MF)865 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
866 AArch64CallLowering::CallLoweringInfo &Info,
867 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
868 const uint32_t *Mask;
869 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
870 // For 'this' returns, use the X0-preserving mask if applicable
871 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
872 if (!Mask) {
873 OutArgs[0].Flags[0].setReturned(false);
874 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
875 }
876 } else {
877 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
878 }
879 return Mask;
880 }
881
lowerTailCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & OutArgs) const882 bool AArch64CallLowering::lowerTailCall(
883 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
884 SmallVectorImpl<ArgInfo> &OutArgs) const {
885 MachineFunction &MF = MIRBuilder.getMF();
886 const Function &F = MF.getFunction();
887 MachineRegisterInfo &MRI = MF.getRegInfo();
888 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
889 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
890
891 // True when we're tail calling, but without -tailcallopt.
892 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
893 Info.CallConv != CallingConv::Tail &&
894 Info.CallConv != CallingConv::SwiftTail;
895
896 // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
897 // register class. Until we can do that, we should fall back here.
898 if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
899 LLVM_DEBUG(
900 dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
901 return false;
902 }
903
904 // Find out which ABI gets to decide where things go.
905 CallingConv::ID CalleeCC = Info.CallConv;
906 CCAssignFn *AssignFnFixed;
907 CCAssignFn *AssignFnVarArg;
908 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
909
910 MachineInstrBuilder CallSeqStart;
911 if (!IsSibCall)
912 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
913
914 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
915 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
916 MIB.add(Info.Callee);
917
918 // Byte offset for the tail call. When we are sibcalling, this will always
919 // be 0.
920 MIB.addImm(0);
921
922 // Tell the call which registers are clobbered.
923 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
924 auto TRI = Subtarget.getRegisterInfo();
925 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
926 if (Subtarget.hasCustomCallingConv())
927 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
928 MIB.addRegMask(Mask);
929
930 if (TRI->isAnyArgRegReserved(MF))
931 TRI->emitReservedArgRegCallError(MF);
932
933 // FPDiff is the byte offset of the call's argument area from the callee's.
934 // Stores to callee stack arguments will be placed in FixedStackSlots offset
935 // by this amount for a tail call. In a sibling call it must be 0 because the
936 // caller will deallocate the entire stack and the callee still expects its
937 // arguments to begin at SP+0.
938 int FPDiff = 0;
939
940 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
941 // by -tailcallopt. For sibcalls, the memory operands for the call are
942 // already available in the caller's incoming argument space.
943 unsigned NumBytes = 0;
944 if (!IsSibCall) {
945 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
946 // before handling assignments, because FPDiff must be known for memory
947 // arguments.
948 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
949 SmallVector<CCValAssign, 16> OutLocs;
950 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
951
952 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
953 Subtarget, /*IsReturn*/ false);
954 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
955 return false;
956
957 // The callee will pop the argument stack as a tail call. Thus, we must
958 // keep it 16-byte aligned.
959 NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
960
961 // FPDiff will be negative if this tail call requires more space than we
962 // would automatically have in our incoming argument space. Positive if we
963 // actually shrink the stack.
964 FPDiff = NumReusableBytes - NumBytes;
965
966 // Update the required reserved area if this is the tail call requiring the
967 // most argument stack space.
968 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
969 FuncInfo->setTailCallReservedStack(-FPDiff);
970
971 // The stack pointer must be 16-byte aligned at all times it's used for a
972 // memory operation, which in practice means at *all* times and in
973 // particular across call boundaries. Therefore our own arguments started at
974 // a 16-byte aligned SP and the delta applied for the tail call should
975 // satisfy the same constraint.
976 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
977 }
978
979 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
980
981 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
982 Subtarget, /*IsReturn*/ false);
983
984 // Do the actual argument marshalling.
985 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
986 /*IsTailCall*/ true, FPDiff);
987 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
988 CalleeCC, Info.IsVarArg))
989 return false;
990
991 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
992
993 if (Info.IsVarArg && Info.IsMustTailCall) {
994 // Now we know what's being passed to the function. Add uses to the call for
995 // the forwarded registers that we *aren't* passing as parameters. This will
996 // preserve the copies we build earlier.
997 for (const auto &F : Forwards) {
998 Register ForwardedReg = F.PReg;
999 // If the register is already passed, or aliases a register which is
1000 // already being passed, then skip it.
1001 if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1002 if (!Use.isReg())
1003 return false;
1004 return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1005 }))
1006 continue;
1007
1008 // We aren't passing it already, so we should add it to the call.
1009 MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1010 MIB.addReg(ForwardedReg, RegState::Implicit);
1011 }
1012 }
1013
1014 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1015 // sequence start and end here.
1016 if (!IsSibCall) {
1017 MIB->getOperand(1).setImm(FPDiff);
1018 CallSeqStart.addImm(0).addImm(0);
1019 // End the call sequence *before* emitting the call. Normally, we would
1020 // tidy the frame up after the call. However, here, we've laid out the
1021 // parameters so that when SP is reset, they will be in the correct
1022 // location.
1023 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1024 }
1025
1026 // Now we can add the actual call instruction to the correct basic block.
1027 MIRBuilder.insertInstr(MIB);
1028
1029 // If Callee is a reg, since it is used by a target specific instruction,
1030 // it must have a register class matching the constraint of that instruction.
1031 if (Info.Callee.isReg())
1032 constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1033 *MF.getSubtarget().getRegBankInfo(), *MIB,
1034 MIB->getDesc(), Info.Callee, 0);
1035
1036 MF.getFrameInfo().setHasTailCall();
1037 Info.LoweredTailCall = true;
1038 return true;
1039 }
1040
lowerCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info) const1041 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1042 CallLoweringInfo &Info) const {
1043 MachineFunction &MF = MIRBuilder.getMF();
1044 const Function &F = MF.getFunction();
1045 MachineRegisterInfo &MRI = MF.getRegInfo();
1046 auto &DL = F.getParent()->getDataLayout();
1047 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1048
1049 SmallVector<ArgInfo, 8> OutArgs;
1050 for (auto &OrigArg : Info.OrigArgs) {
1051 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1052 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1053 if (OrigArg.Ty->isIntegerTy(1))
1054 OutArgs.back().Flags[0].setZExt();
1055 }
1056
1057 SmallVector<ArgInfo, 8> InArgs;
1058 if (!Info.OrigRet.Ty->isVoidTy())
1059 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1060
1061 // If we can lower as a tail call, do that instead.
1062 bool CanTailCallOpt =
1063 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1064
1065 // We must emit a tail call if we have musttail.
1066 if (Info.IsMustTailCall && !CanTailCallOpt) {
1067 // There are types of incoming/outgoing arguments we can't handle yet, so
1068 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1069 // fall back to SelectionDAG and let it try to handle this.
1070 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1071 return false;
1072 }
1073
1074 if (CanTailCallOpt)
1075 return lowerTailCall(MIRBuilder, Info, OutArgs);
1076
1077 // Find out which ABI gets to decide where things go.
1078 CCAssignFn *AssignFnFixed;
1079 CCAssignFn *AssignFnVarArg;
1080 std::tie(AssignFnFixed, AssignFnVarArg) =
1081 getAssignFnsForCC(Info.CallConv, TLI);
1082
1083 MachineInstrBuilder CallSeqStart;
1084 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1085
1086 // Create a temporarily-floating call instruction so we can add the implicit
1087 // uses of arg registers.
1088 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1089
1090 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1091 MIB.add(Info.Callee);
1092
1093 // Tell the call which registers are clobbered.
1094 const uint32_t *Mask;
1095 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1096 const auto *TRI = Subtarget.getRegisterInfo();
1097
1098 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1099 Subtarget, /*IsReturn*/ false);
1100 // Do the actual argument marshalling.
1101 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1102 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1103 Info.CallConv, Info.IsVarArg))
1104 return false;
1105
1106 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1107
1108 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1109 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1110 MIB.addRegMask(Mask);
1111
1112 if (TRI->isAnyArgRegReserved(MF))
1113 TRI->emitReservedArgRegCallError(MF);
1114
1115 // Now we can add the actual call instruction to the correct basic block.
1116 MIRBuilder.insertInstr(MIB);
1117
1118 // If Callee is a reg, since it is used by a target specific
1119 // instruction, it must have a register class matching the
1120 // constraint of that instruction.
1121 if (Info.Callee.isReg())
1122 constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1123 *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1124 Info.Callee, 0);
1125
1126 // Finally we can copy the returned value back into its virtual-register. In
1127 // symmetry with the arguments, the physical register must be an
1128 // implicit-define of the call instruction.
1129 if (!Info.OrigRet.Ty->isVoidTy()) {
1130 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1131 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1132 bool UsingReturnedArg =
1133 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1134
1135 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1136 /*IsReturn*/ false);
1137 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1138 if (!determineAndHandleAssignments(
1139 UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1140 MIRBuilder, Info.CallConv, Info.IsVarArg,
1141 UsingReturnedArg ? OutArgs[0].Regs[0] : Register()))
1142 return false;
1143 }
1144
1145 if (Info.SwiftErrorVReg) {
1146 MIB.addDef(AArch64::X21, RegState::Implicit);
1147 MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1148 }
1149
1150 uint64_t CalleePopBytes =
1151 doesCalleeRestoreStack(Info.CallConv,
1152 MF.getTarget().Options.GuaranteedTailCallOpt)
1153 ? alignTo(Assigner.StackOffset, 16)
1154 : 0;
1155
1156 CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1157 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1158 .addImm(Assigner.StackOffset)
1159 .addImm(CalleePopBytes);
1160
1161 return true;
1162 }
1163
isTypeIsValidForThisReturn(EVT Ty) const1164 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1165 return Ty.getSizeInBits() == 64;
1166 }
1167