xref: /llvm-project/clang/lib/CodeGen/Targets/AArch64.cpp (revision e9c2e0acd747b7b5260cf022233fa9b5336d2d30)
1 //===- AArch64.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "clang/AST/Decl.h"
12 #include "clang/Basic/DiagnosticFrontend.h"
13 #include "llvm/TargetParser/AArch64TargetParser.h"
14 
15 using namespace clang;
16 using namespace clang::CodeGen;
17 
18 //===----------------------------------------------------------------------===//
19 // AArch64 ABI Implementation
20 //===----------------------------------------------------------------------===//
21 
22 namespace {
23 
24 class AArch64ABIInfo : public ABIInfo {
25   AArch64ABIKind Kind;
26 
27 public:
28   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
29       : ABIInfo(CGT), Kind(Kind) {}
30 
31   bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
32 
33 private:
34   AArch64ABIKind getABIKind() const { return Kind; }
35   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
36 
37   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const;
38   ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn,
39                                   bool IsNamedArg, unsigned CallingConvention,
40                                   unsigned &NSRN, unsigned &NPRN) const;
41   llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const;
42   ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
43                                  unsigned &NPRN) const;
44   ABIArgInfo coerceAndExpandPureScalableAggregate(
45       QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
46       const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
47       unsigned &NPRN) const;
48   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
49   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
50                                          uint64_t Members) const override;
51   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
52 
53   bool isIllegalVectorType(QualType Ty) const;
54 
55   bool passAsAggregateType(QualType Ty) const;
56   bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
57                               SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
58 
59   void flattenType(llvm::Type *Ty,
60                    SmallVectorImpl<llvm::Type *> &Flattened) const;
61 
62   void computeInfo(CGFunctionInfo &FI) const override {
63     if (!::classifyReturnType(getCXXABI(), FI, *this))
64       FI.getReturnInfo() =
65           classifyReturnType(FI.getReturnType(), FI.isVariadic());
66 
67     unsigned ArgNo = 0;
68     unsigned NSRN = 0, NPRN = 0;
69     for (auto &it : FI.arguments()) {
70       const bool IsNamedArg =
71           !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
72       ++ArgNo;
73       it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
74                                      FI.getCallingConvention(), NSRN, NPRN);
75     }
76   }
77 
78   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
79                          AggValueSlot Slot) const;
80 
81   RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
82                         AArch64ABIKind Kind, AggValueSlot Slot) const;
83 
84   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
85                    AggValueSlot Slot) const override {
86     llvm::Type *BaseTy = CGF.ConvertType(Ty);
87     if (isa<llvm::ScalableVectorType>(BaseTy))
88       llvm::report_fatal_error("Passing SVE types to variadic functions is "
89                                "currently not supported");
90 
91     return Kind == AArch64ABIKind::Win64
92                ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot)
93            : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot)
94                            : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot);
95   }
96 
97   RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
98                      AggValueSlot Slot) const override;
99 
100   bool allowBFloatArgsAndRet() const override {
101     return getTarget().hasBFloat16Type();
102   }
103 
104   using ABIInfo::appendAttributeMangling;
105   void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
106                                raw_ostream &Out) const override;
107   void appendAttributeMangling(StringRef AttrStr,
108                                raw_ostream &Out) const override;
109 };
110 
111 class AArch64SwiftABIInfo : public SwiftABIInfo {
112 public:
113   explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
114       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
115 
116   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
117                          unsigned NumElts) const override;
118 };
119 
120 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
121 public:
122   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
123       : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
124     SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
125   }
126 
127   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
128     return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
129   }
130 
131   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
132     return 31;
133   }
134 
135   bool doesReturnSlotInterfereWithArgs() const override { return false; }
136 
137   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
138                            CodeGen::CodeGenModule &CGM) const override {
139     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
140     if (!FD)
141       return;
142 
143     TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts());
144 
145     if (const auto *TA = FD->getAttr<TargetAttr>()) {
146       ParsedTargetAttr Attr =
147           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
148       if (!Attr.BranchProtection.empty()) {
149         StringRef Error;
150         (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
151                                                        Attr.CPU, BPI, Error);
152         assert(Error.empty());
153       }
154     }
155     auto *Fn = cast<llvm::Function>(GV);
156     setBranchProtectionFnAttributes(BPI, *Fn);
157   }
158 
159   bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
160                                 llvm::Type *Ty) const override {
161     if (CGF.getTarget().hasFeature("ls64")) {
162       auto *ST = dyn_cast<llvm::StructType>(Ty);
163       if (ST && ST->getNumElements() == 1) {
164         auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
165         if (AT && AT->getNumElements() == 8 &&
166             AT->getElementType()->isIntegerTy(64))
167           return true;
168       }
169     }
170     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
171   }
172 
173   void checkFunctionABI(CodeGenModule &CGM,
174                         const FunctionDecl *Decl) const override;
175 
176   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
177                             const FunctionDecl *Caller,
178                             const FunctionDecl *Callee, const CallArgList &Args,
179                             QualType ReturnType) const override;
180 
181   bool wouldInliningViolateFunctionCallABI(
182       const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
183 
184 private:
185   // Diagnose calls between functions with incompatible Streaming SVE
186   // attributes.
187   void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc,
188                                      const FunctionDecl *Caller,
189                                      const FunctionDecl *Callee) const;
190   // Diagnose calls which must pass arguments in floating-point registers when
191   // the selected target does not have floating-point registers.
192   void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc,
193                                      const FunctionDecl *Caller,
194                                      const FunctionDecl *Callee,
195                                      const CallArgList &Args,
196                                      QualType ReturnType) const;
197 };
198 
199 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
200 public:
201   WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
202       : AArch64TargetCodeGenInfo(CGT, K) {}
203 
204   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
205                            CodeGen::CodeGenModule &CGM) const override;
206 
207   void getDependentLibraryOption(llvm::StringRef Lib,
208                                  llvm::SmallString<24> &Opt) const override {
209     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
210   }
211 
212   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
213                                llvm::SmallString<32> &Opt) const override {
214     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
215   }
216 };
217 
218 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
219     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
220   AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
221   if (GV->isDeclaration())
222     return;
223   addStackProbeTargetAttributes(D, GV, CGM);
224 }
225 }
226 
227 llvm::Type *
228 AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const {
229   assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
230 
231   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
232     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
233                BuiltinType::UChar &&
234            "unexpected builtin type for SVE predicate!");
235     return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()),
236                                          16);
237   }
238 
239   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
240     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
241     switch (BT->getKind()) {
242     default:
243       llvm_unreachable("unexpected builtin type for SVE vector!");
244 
245     case BuiltinType::SChar:
246     case BuiltinType::UChar:
247     case BuiltinType::MFloat8:
248       return llvm::ScalableVectorType::get(
249           llvm::Type::getInt8Ty(getVMContext()), 16);
250 
251     case BuiltinType::Short:
252     case BuiltinType::UShort:
253       return llvm::ScalableVectorType::get(
254           llvm::Type::getInt16Ty(getVMContext()), 8);
255 
256     case BuiltinType::Int:
257     case BuiltinType::UInt:
258       return llvm::ScalableVectorType::get(
259           llvm::Type::getInt32Ty(getVMContext()), 4);
260 
261     case BuiltinType::Long:
262     case BuiltinType::ULong:
263       return llvm::ScalableVectorType::get(
264           llvm::Type::getInt64Ty(getVMContext()), 2);
265 
266     case BuiltinType::Half:
267       return llvm::ScalableVectorType::get(
268           llvm::Type::getHalfTy(getVMContext()), 8);
269 
270     case BuiltinType::Float:
271       return llvm::ScalableVectorType::get(
272           llvm::Type::getFloatTy(getVMContext()), 4);
273 
274     case BuiltinType::Double:
275       return llvm::ScalableVectorType::get(
276           llvm::Type::getDoubleTy(getVMContext()), 2);
277 
278     case BuiltinType::BFloat16:
279       return llvm::ScalableVectorType::get(
280           llvm::Type::getBFloatTy(getVMContext()), 8);
281     }
282   }
283 
284   llvm_unreachable("expected fixed-length SVE vector");
285 }
286 
287 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
288                                                unsigned &NPRN) const {
289   assert(Ty->isVectorType() && "expected vector type!");
290 
291   const auto *VT = Ty->castAs<VectorType>();
292   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
293     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
294     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
295                BuiltinType::UChar &&
296            "unexpected builtin type for SVE predicate!");
297     NPRN = std::min(NPRN + 1, 4u);
298     return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
299         llvm::Type::getInt1Ty(getVMContext()), 16));
300   }
301 
302   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
303     NSRN = std::min(NSRN + 1, 8u);
304     return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT));
305   }
306 
307   uint64_t Size = getContext().getTypeSize(Ty);
308   // Android promotes <2 x i8> to i16, not i32
309   if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
310     llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
311     return ABIArgInfo::getDirect(ResType);
312   }
313   if (Size <= 32) {
314     llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
315     return ABIArgInfo::getDirect(ResType);
316   }
317   if (Size == 64) {
318     NSRN = std::min(NSRN + 1, 8u);
319     auto *ResType =
320         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
321     return ABIArgInfo::getDirect(ResType);
322   }
323   if (Size == 128) {
324     NSRN = std::min(NSRN + 1, 8u);
325     auto *ResType =
326         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
327     return ABIArgInfo::getDirect(ResType);
328   }
329 
330   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
331 }
332 
333 ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
334     QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
335     const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
336     unsigned &NPRN) const {
337   if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
338     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
339   NSRN += NVec;
340   NPRN += NPred;
341 
342   // Handle SVE vector tuples.
343   if (Ty->isSVESizelessBuiltinType())
344     return ABIArgInfo::getDirect();
345 
346   llvm::Type *UnpaddedCoerceToType =
347       UnpaddedCoerceToSeq.size() == 1
348           ? UnpaddedCoerceToSeq[0]
349           : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq,
350                                   true);
351 
352   SmallVector<llvm::Type *> CoerceToSeq;
353   flattenType(CGT.ConvertType(Ty), CoerceToSeq);
354   auto *CoerceToType =
355       llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false);
356 
357   return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
358 }
359 
360 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
361                                                 bool IsNamedArg,
362                                                 unsigned CallingConvention,
363                                                 unsigned &NSRN,
364                                                 unsigned &NPRN) const {
365   Ty = useFirstFieldIfTransparentUnion(Ty);
366 
367   // Handle illegal vector types here.
368   if (isIllegalVectorType(Ty))
369     return coerceIllegalVector(Ty, NSRN, NPRN);
370 
371   if (!passAsAggregateType(Ty)) {
372     // Treat an enum type as its underlying type.
373     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
374       Ty = EnumTy->getDecl()->getIntegerType();
375 
376     if (const auto *EIT = Ty->getAs<BitIntType>())
377       if (EIT->getNumBits() > 128)
378         return getNaturalAlignIndirect(Ty, false);
379 
380     if (Ty->isVectorType())
381       NSRN = std::min(NSRN + 1, 8u);
382     else if (const auto *BT = Ty->getAs<BuiltinType>()) {
383       if (BT->isFloatingPoint())
384         NSRN = std::min(NSRN + 1, 8u);
385       else {
386         switch (BT->getKind()) {
387         case BuiltinType::SveBool:
388         case BuiltinType::SveCount:
389           NPRN = std::min(NPRN + 1, 4u);
390           break;
391         case BuiltinType::SveBoolx2:
392           NPRN = std::min(NPRN + 2, 4u);
393           break;
394         case BuiltinType::SveBoolx4:
395           NPRN = std::min(NPRN + 4, 4u);
396           break;
397         default:
398           if (BT->isSVESizelessBuiltinType())
399             NSRN = std::min(
400                 NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors,
401                 8u);
402         }
403       }
404     }
405 
406     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
407                 ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
408                 : ABIArgInfo::getDirect());
409   }
410 
411   // Structures with either a non-trivial destructor or a non-trivial
412   // copy constructor are always indirect.
413   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
414     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
415                                      CGCXXABI::RAA_DirectInMemory);
416   }
417 
418   // Empty records:
419   uint64_t Size = getContext().getTypeSize(Ty);
420   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
421   if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
422     // Empty records are ignored in C mode, and in C++ on Darwin.
423     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
424       return ABIArgInfo::getIgnore();
425 
426     // In C++ mode, arguments which have sizeof() == 0 (which are non-standard
427     // C++) are ignored. This isn't defined by any standard, so we copy GCC's
428     // behaviour here.
429     if (Size == 0)
430       return ABIArgInfo::getIgnore();
431 
432     // Otherwise, they are passed as if they have a size of 1 byte.
433     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
434   }
435 
436   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
437   const Type *Base = nullptr;
438   uint64_t Members = 0;
439   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
440                  CallingConvention == llvm::CallingConv::Win64;
441   bool IsWinVariadic = IsWin64 && IsVariadicFn;
442   // In variadic functions on Windows, all composite types are treated alike,
443   // no special handling of HFAs/HVAs.
444   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
445     NSRN = std::min(NSRN + Members, uint64_t(8));
446     if (Kind != AArch64ABIKind::AAPCS)
447       return ABIArgInfo::getDirect(
448           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
449 
450     // For HFAs/HVAs, cap the argument alignment to 16, otherwise
451     // set it to 8 according to the AAPCS64 document.
452     unsigned Align =
453         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
454     Align = (Align >= 16) ? 16 : 8;
455     return ABIArgInfo::getDirect(
456         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
457         nullptr, true, Align);
458   }
459 
460   // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
461   // registers, or indirectly if there are not enough registers.
462   if (Kind == AArch64ABIKind::AAPCS) {
463     unsigned NVec = 0, NPred = 0;
464     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
465     if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
466         (NVec + NPred) > 0)
467       return coerceAndExpandPureScalableAggregate(
468           Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
469   }
470 
471   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
472   if (Size <= 128) {
473     unsigned Alignment;
474     if (Kind == AArch64ABIKind::AAPCS) {
475       Alignment = getContext().getTypeUnadjustedAlign(Ty);
476       Alignment = Alignment < 128 ? 64 : 128;
477     } else {
478       Alignment =
479           std::max(getContext().getTypeAlign(Ty),
480                    (unsigned)getTarget().getPointerWidth(LangAS::Default));
481     }
482     Size = llvm::alignTo(Size, Alignment);
483 
484     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
485     // For aggregates with 16-byte alignment, we use i128.
486     llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
487     return ABIArgInfo::getDirect(
488         Size == Alignment ? BaseTy
489                           : llvm::ArrayType::get(BaseTy, Size / Alignment));
490   }
491 
492   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
493 }
494 
495 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
496                                               bool IsVariadicFn) const {
497   if (RetTy->isVoidType())
498     return ABIArgInfo::getIgnore();
499 
500   if (const auto *VT = RetTy->getAs<VectorType>()) {
501     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
502         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
503       unsigned NSRN = 0, NPRN = 0;
504       return coerceIllegalVector(RetTy, NSRN, NPRN);
505     }
506   }
507 
508   // Large vector types should be returned via memory.
509   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
510     return getNaturalAlignIndirect(RetTy);
511 
512   if (!passAsAggregateType(RetTy)) {
513     // Treat an enum type as its underlying type.
514     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
515       RetTy = EnumTy->getDecl()->getIntegerType();
516 
517     if (const auto *EIT = RetTy->getAs<BitIntType>())
518       if (EIT->getNumBits() > 128)
519         return getNaturalAlignIndirect(RetTy);
520 
521     return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
522                 ? ABIArgInfo::getExtend(RetTy)
523                 : ABIArgInfo::getDirect());
524   }
525 
526   uint64_t Size = getContext().getTypeSize(RetTy);
527   if (!RetTy->isSVESizelessBuiltinType() &&
528       (isEmptyRecord(getContext(), RetTy, true) || Size == 0))
529     return ABIArgInfo::getIgnore();
530 
531   const Type *Base = nullptr;
532   uint64_t Members = 0;
533   if (isHomogeneousAggregate(RetTy, Base, Members) &&
534       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
535         IsVariadicFn))
536     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
537     return ABIArgInfo::getDirect();
538 
539   // In AAPCS return values of a Pure Scalable type are treated as a single
540   // named argument and passed expanded in registers, or indirectly if there are
541   // not enough registers.
542   if (Kind == AArch64ABIKind::AAPCS) {
543     unsigned NSRN = 0, NPRN = 0;
544     unsigned NVec = 0, NPred = 0;
545     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
546     if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
547         (NVec + NPred) > 0)
548       return coerceAndExpandPureScalableAggregate(
549           RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
550           NPRN);
551   }
552 
553   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
554   if (Size <= 128) {
555     if (Size <= 64 && getDataLayout().isLittleEndian()) {
556       // Composite types are returned in lower bits of a 64-bit register for LE,
557       // and in higher bits for BE. However, integer types are always returned
558       // in lower bits for both LE and BE, and they are not rounded up to
559       // 64-bits. We can skip rounding up of composite types for LE, but not for
560       // BE, otherwise composite types will be indistinguishable from integer
561       // types.
562       return ABIArgInfo::getDirect(
563           llvm::IntegerType::get(getVMContext(), Size));
564     }
565 
566     unsigned Alignment = getContext().getTypeAlign(RetTy);
567     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
568 
569     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
570     // For aggregates with 16-byte alignment, we use i128.
571     if (Alignment < 128 && Size == 128) {
572       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
573       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
574     }
575     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
576   }
577 
578   return getNaturalAlignIndirect(RetTy);
579 }
580 
581 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
582 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
583   if (const VectorType *VT = Ty->getAs<VectorType>()) {
584     // Check whether VT is a fixed-length SVE vector. These types are
585     // represented as scalable vectors in function args/return and must be
586     // coerced from fixed vectors.
587     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
588         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
589       return true;
590 
591     // Check whether VT is legal.
592     unsigned NumElements = VT->getNumElements();
593     uint64_t Size = getContext().getTypeSize(VT);
594     // NumElements should be power of 2.
595     if (!llvm::isPowerOf2_32(NumElements))
596       return true;
597 
598     // arm64_32 has to be compatible with the ARM logic here, which allows huge
599     // vectors for some reason.
600     llvm::Triple Triple = getTarget().getTriple();
601     if (Triple.getArch() == llvm::Triple::aarch64_32 &&
602         Triple.isOSBinFormatMachO())
603       return Size <= 32;
604 
605     return Size != 64 && (Size != 128 || NumElements == 1);
606   }
607   return false;
608 }
609 
610 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
611                                             llvm::Type *EltTy,
612                                             unsigned NumElts) const {
613   if (!llvm::isPowerOf2_32(NumElts))
614     return false;
615   if (VectorSize.getQuantity() != 8 &&
616       (VectorSize.getQuantity() != 16 || NumElts == 1))
617     return false;
618   return true;
619 }
620 
621 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
622   // For the soft-float ABI variant, no types are considered to be homogeneous
623   // aggregates.
624   if (isSoftFloat())
625     return false;
626 
627   // Homogeneous aggregates for AAPCS64 must have base types of a floating
628   // point type or a short-vector type. This is the same as the 32-bit ABI,
629   // but with the difference that any floating-point type is allowed,
630   // including __fp16.
631   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
632     if (BT->isFloatingPoint())
633       return true;
634   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
635     if (auto Kind = VT->getVectorKind();
636         Kind == VectorKind::SveFixedLengthData ||
637         Kind == VectorKind::SveFixedLengthPredicate)
638       return false;
639 
640     unsigned VecSize = getContext().getTypeSize(VT);
641     if (VecSize == 64 || VecSize == 128)
642       return true;
643   }
644   return false;
645 }
646 
647 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
648                                                        uint64_t Members) const {
649   return Members <= 4;
650 }
651 
652 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
653     const {
654   // AAPCS64 says that the rule for whether something is a homogeneous
655   // aggregate is applied to the output of the data layout decision. So
656   // anything that doesn't affect the data layout also does not affect
657   // homogeneity. In particular, zero-length bitfields don't stop a struct
658   // being homogeneous.
659   return true;
660 }
661 
662 bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
663   if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
664     const auto *BT = Ty->castAs<BuiltinType>();
665     return !BT->isSVECount() &&
666            getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
667   }
668   return isAggregateTypeForABI(Ty);
669 }
670 
671 // Check if a type needs to be passed in registers as a Pure Scalable Type (as
672 // defined by AAPCS64). Return the number of data vectors and the number of
673 // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
674 // return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one
675 // element for each non-composite member. For practical purposes, limit the
676 // length of `CoerceToSeq` to about 12 (the maximum that could possibly fit
677 // in registers) and return false, the effect of which will be to  pass the
678 // argument under the rules for a large (> 128 bytes) composite.
679 bool AArch64ABIInfo::passAsPureScalableType(
680     QualType Ty, unsigned &NVec, unsigned &NPred,
681     SmallVectorImpl<llvm::Type *> &CoerceToSeq) const {
682   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
683     uint64_t NElt = AT->getZExtSize();
684     if (NElt == 0)
685       return false;
686 
687     unsigned NV = 0, NP = 0;
688     SmallVector<llvm::Type *> EltCoerceToSeq;
689     if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq))
690       return false;
691 
692     if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12)
693       return false;
694 
695     for (uint64_t I = 0; I < NElt; ++I)
696       llvm::copy(EltCoerceToSeq, std::back_inserter(CoerceToSeq));
697 
698     NVec += NElt * NV;
699     NPred += NElt * NP;
700     return true;
701   }
702 
703   if (const RecordType *RT = Ty->getAs<RecordType>()) {
704     // If the record cannot be passed in registers, then it's not a PST.
705     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
706         RAA != CGCXXABI::RAA_Default)
707       return false;
708 
709     // Pure scalable types are never unions and never contain unions.
710     const RecordDecl *RD = RT->getDecl();
711     if (RD->isUnion())
712       return false;
713 
714     // If this is a C++ record, check the bases.
715     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
716       for (const auto &I : CXXRD->bases()) {
717         if (isEmptyRecord(getContext(), I.getType(), true))
718           continue;
719         if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq))
720           return false;
721       }
722     }
723 
724     // Check members.
725     for (const auto *FD : RD->fields()) {
726       QualType FT = FD->getType();
727       if (isEmptyField(getContext(), FD, /* AllowArrays */ true))
728         continue;
729       if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq))
730         return false;
731     }
732 
733     return true;
734   }
735 
736   if (const auto *VT = Ty->getAs<VectorType>()) {
737     if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
738       ++NPred;
739       if (CoerceToSeq.size() + 1 > 12)
740         return false;
741       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
742       return true;
743     }
744 
745     if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
746       ++NVec;
747       if (CoerceToSeq.size() + 1 > 12)
748         return false;
749       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
750       return true;
751     }
752 
753     return false;
754   }
755 
756   if (!Ty->isBuiltinType())
757     return false;
758 
759   bool isPredicate;
760   switch (Ty->getAs<BuiltinType>()->getKind()) {
761 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
762   case BuiltinType::Id:                                                        \
763     isPredicate = false;                                                       \
764     break;
765 #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
766   case BuiltinType::Id:                                                        \
767     isPredicate = true;                                                        \
768     break;
769 #define SVE_TYPE(Name, Id, SingletonId)
770 #include "clang/Basic/AArch64SVEACLETypes.def"
771   default:
772     return false;
773   }
774 
775   ASTContext::BuiltinVectorTypeInfo Info =
776       getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
777   assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
778          "Expected 1, 2, 3 or 4 vectors!");
779   if (isPredicate)
780     NPred += Info.NumVectors;
781   else
782     NVec += Info.NumVectors;
783   llvm::Type *EltTy = Info.ElementType->isMFloat8Type()
784                           ? llvm::Type::getInt8Ty(getVMContext())
785                           : CGT.ConvertType(Info.ElementType);
786   auto *VTy = llvm::ScalableVectorType::get(EltTy, Info.EC.getKnownMinValue());
787 
788   if (CoerceToSeq.size() + Info.NumVectors > 12)
789     return false;
790   std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy);
791 
792   return true;
793 }
794 
795 // Expand an LLVM IR type into a sequence with a element for each non-struct,
796 // non-array member of the type, with the exception of the padding types, which
797 // are retained.
798 void AArch64ABIInfo::flattenType(
799     llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const {
800 
801   if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) {
802     Flattened.push_back(Ty);
803     return;
804   }
805 
806   if (const auto *AT = dyn_cast<llvm::ArrayType>(Ty)) {
807     uint64_t NElt = AT->getNumElements();
808     if (NElt == 0)
809       return;
810 
811     SmallVector<llvm::Type *> EltFlattened;
812     flattenType(AT->getElementType(), EltFlattened);
813 
814     for (uint64_t I = 0; I < NElt; ++I)
815       llvm::copy(EltFlattened, std::back_inserter(Flattened));
816     return;
817   }
818 
819   if (const auto *ST = dyn_cast<llvm::StructType>(Ty)) {
820     for (auto *ET : ST->elements())
821       flattenType(ET, Flattened);
822     return;
823   }
824 
825   Flattened.push_back(Ty);
826 }
827 
828 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
829                                       CodeGenFunction &CGF, AArch64ABIKind Kind,
830                                       AggValueSlot Slot) const {
831   // These numbers are not used for variadic arguments, hence it doesn't matter
832   // they don't retain their values across multiple calls to
833   // `classifyArgumentType` here.
834   unsigned NSRN = 0, NPRN = 0;
835   ABIArgInfo AI =
836       classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false,
837                            CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN);
838   // Empty records are ignored for parameter passing purposes.
839   if (AI.isIgnore())
840     return Slot.asRValue();
841 
842   bool IsIndirect = AI.isIndirect();
843 
844   llvm::Type *BaseTy = CGF.ConvertType(Ty);
845   if (IsIndirect)
846     BaseTy = llvm::PointerType::getUnqual(BaseTy);
847   else if (AI.getCoerceToType())
848     BaseTy = AI.getCoerceToType();
849 
850   unsigned NumRegs = 1;
851   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
852     BaseTy = ArrTy->getElementType();
853     NumRegs = ArrTy->getNumElements();
854   }
855   bool IsFPR =
856       !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
857 
858   // The AArch64 va_list type and handling is specified in the Procedure Call
859   // Standard, section B.4:
860   //
861   // struct {
862   //   void *__stack;
863   //   void *__gr_top;
864   //   void *__vr_top;
865   //   int __gr_offs;
866   //   int __vr_offs;
867   // };
868 
869   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
870   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
871   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
872   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
873 
874   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
875   CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
876 
877   Address reg_offs_p = Address::invalid();
878   llvm::Value *reg_offs = nullptr;
879   int reg_top_index;
880   int RegSize = IsIndirect ? 8 : TySize.getQuantity();
881   if (!IsFPR) {
882     // 3 is the field number of __gr_offs
883     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
884     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
885     reg_top_index = 1; // field number for __gr_top
886     RegSize = llvm::alignTo(RegSize, 8);
887   } else {
888     // 4 is the field number of __vr_offs.
889     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
890     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
891     reg_top_index = 2; // field number for __vr_top
892     RegSize = 16 * NumRegs;
893   }
894 
895   //=======================================
896   // Find out where argument was passed
897   //=======================================
898 
899   // If reg_offs >= 0 we're already using the stack for this type of
900   // argument. We don't want to keep updating reg_offs (in case it overflows,
901   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
902   // whatever they get).
903   llvm::Value *UsingStack = nullptr;
904   UsingStack = CGF.Builder.CreateICmpSGE(
905       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
906 
907   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
908 
909   // Otherwise, at least some kind of argument could go in these registers, the
910   // question is whether this particular type is too big.
911   CGF.EmitBlock(MaybeRegBlock);
912 
913   // Integer arguments may need to correct register alignment (for example a
914   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
915   // align __gr_offs to calculate the potential address.
916   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
917     int Align = TyAlign.getQuantity();
918 
919     reg_offs = CGF.Builder.CreateAdd(
920         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
921         "align_regoffs");
922     reg_offs = CGF.Builder.CreateAnd(
923         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
924         "aligned_regoffs");
925   }
926 
927   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
928   // The fact that this is done unconditionally reflects the fact that
929   // allocating an argument to the stack also uses up all the remaining
930   // registers of the appropriate kind.
931   llvm::Value *NewOffset = nullptr;
932   NewOffset = CGF.Builder.CreateAdd(
933       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
934   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
935 
936   // Now we're in a position to decide whether this argument really was in
937   // registers or not.
938   llvm::Value *InRegs = nullptr;
939   InRegs = CGF.Builder.CreateICmpSLE(
940       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
941 
942   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
943 
944   //=======================================
945   // Argument was in registers
946   //=======================================
947 
948   // Now we emit the code for if the argument was originally passed in
949   // registers. First start the appropriate block:
950   CGF.EmitBlock(InRegBlock);
951 
952   llvm::Value *reg_top = nullptr;
953   Address reg_top_p =
954       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
955   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
956   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
957                    CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
958   Address RegAddr = Address::invalid();
959   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
960 
961   if (IsIndirect) {
962     // If it's been passed indirectly (actually a struct), whatever we find from
963     // stored registers or on the stack will actually be a struct **.
964     MemTy = llvm::PointerType::getUnqual(MemTy);
965   }
966 
967   const Type *Base = nullptr;
968   uint64_t NumMembers = 0;
969   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
970   if (IsHFA && NumMembers > 1) {
971     // Homogeneous aggregates passed in registers will have their elements split
972     // and stored 16-bytes apart regardless of size (they're notionally in qN,
973     // qN+1, ...). We reload and store into a temporary local variable
974     // contiguously.
975     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
976     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
977     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
978     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
979     Address Tmp = CGF.CreateTempAlloca(HFATy,
980                                        std::max(TyAlign, BaseTyInfo.Align));
981 
982     // On big-endian platforms, the value will be right-aligned in its slot.
983     int Offset = 0;
984     if (CGF.CGM.getDataLayout().isBigEndian() &&
985         BaseTyInfo.Width.getQuantity() < 16)
986       Offset = 16 - BaseTyInfo.Width.getQuantity();
987 
988     for (unsigned i = 0; i < NumMembers; ++i) {
989       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
990       Address LoadAddr =
991         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
992       LoadAddr = LoadAddr.withElementType(BaseTy);
993 
994       Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
995 
996       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
997       CGF.Builder.CreateStore(Elem, StoreAddr);
998     }
999 
1000     RegAddr = Tmp.withElementType(MemTy);
1001   } else {
1002     // Otherwise the object is contiguous in memory.
1003 
1004     // It might be right-aligned in its slot.
1005     CharUnits SlotSize = BaseAddr.getAlignment();
1006     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
1007         (IsHFA || !isAggregateTypeForABI(Ty)) &&
1008         TySize < SlotSize) {
1009       CharUnits Offset = SlotSize - TySize;
1010       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
1011     }
1012 
1013     RegAddr = BaseAddr.withElementType(MemTy);
1014   }
1015 
1016   CGF.EmitBranch(ContBlock);
1017 
1018   //=======================================
1019   // Argument was on the stack
1020   //=======================================
1021   CGF.EmitBlock(OnStackBlock);
1022 
1023   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
1024   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
1025 
1026   // Again, stack arguments may need realignment. In this case both integer and
1027   // floating-point ones might be affected.
1028   if (!IsIndirect && TyAlign.getQuantity() > 8) {
1029     OnStackPtr = emitRoundPointerUpToAlignment(CGF, OnStackPtr, TyAlign);
1030   }
1031   Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
1032                                 std::max(CharUnits::fromQuantity(8), TyAlign));
1033 
1034   // All stack slots are multiples of 8 bytes.
1035   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
1036   CharUnits StackSize;
1037   if (IsIndirect)
1038     StackSize = StackSlotSize;
1039   else
1040     StackSize = TySize.alignTo(StackSlotSize);
1041 
1042   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
1043   llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
1044       CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
1045 
1046   // Write the new value of __stack for the next call to va_arg
1047   CGF.Builder.CreateStore(NewStack, stack_p);
1048 
1049   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
1050       TySize < StackSlotSize) {
1051     CharUnits Offset = StackSlotSize - TySize;
1052     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
1053   }
1054 
1055   OnStackAddr = OnStackAddr.withElementType(MemTy);
1056 
1057   CGF.EmitBranch(ContBlock);
1058 
1059   //=======================================
1060   // Tidy up
1061   //=======================================
1062   CGF.EmitBlock(ContBlock);
1063 
1064   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
1065                                  OnStackBlock, "vaargs.addr");
1066 
1067   if (IsIndirect)
1068     return CGF.EmitLoadOfAnyValue(
1069         CGF.MakeAddrLValue(
1070             Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
1071                     TyAlign),
1072             Ty),
1073         Slot);
1074 
1075   return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot);
1076 }
1077 
1078 RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
1079                                        CodeGenFunction &CGF,
1080                                        AggValueSlot Slot) const {
1081   // The backend's lowering doesn't support va_arg for aggregates or
1082   // illegal vector types.  Lower VAArg here for these cases and use
1083   // the LLVM va_arg instruction for everything else.
1084   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
1085     return CGF.EmitLoadOfAnyValue(
1086         CGF.MakeAddrLValue(
1087             EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()), Ty),
1088         Slot);
1089 
1090   uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
1091   CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
1092 
1093   // Empty records are ignored for parameter passing purposes.
1094   if (isEmptyRecord(getContext(), Ty, true))
1095     return Slot.asRValue();
1096 
1097   // The size of the actual thing passed, which might end up just
1098   // being a pointer for indirect types.
1099   auto TyInfo = getContext().getTypeInfoInChars(Ty);
1100 
1101   // Arguments bigger than 16 bytes which aren't homogeneous
1102   // aggregates should be passed indirectly.
1103   bool IsIndirect = false;
1104   if (TyInfo.Width.getQuantity() > 16) {
1105     const Type *Base = nullptr;
1106     uint64_t Members = 0;
1107     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
1108   }
1109 
1110   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize,
1111                           /*AllowHigherAlign*/ true, Slot);
1112 }
1113 
1114 RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
1115                                    QualType Ty, AggValueSlot Slot) const {
1116   bool IsIndirect = false;
1117 
1118   // Composites larger than 16 bytes are passed by reference.
1119   if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1120     IsIndirect = true;
1121 
1122   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
1123                           CGF.getContext().getTypeInfoInChars(Ty),
1124                           CharUnits::fromQuantity(8),
1125                           /*allowHigherAlign*/ false, Slot);
1126 }
1127 
1128 static bool isStreamingCompatible(const FunctionDecl *F) {
1129   if (const auto *T = F->getType()->getAs<FunctionProtoType>())
1130     return T->getAArch64SMEAttributes() &
1131            FunctionType::SME_PStateSMCompatibleMask;
1132   return false;
1133 }
1134 
1135 // Report an error if an argument or return value of type Ty would need to be
1136 // passed in a floating-point register.
1137 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
1138                                  const StringRef ABIName,
1139                                  const AArch64ABIInfo &ABIInfo,
1140                                  const QualType &Ty, const NamedDecl *D,
1141                                  SourceLocation loc) {
1142   const Type *HABase = nullptr;
1143   uint64_t HAMembers = 0;
1144   if (Ty->isFloatingType() || Ty->isVectorType() ||
1145       ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
1146     Diags.Report(loc, diag::err_target_unsupported_type_for_abi)
1147         << D->getDeclName() << Ty << ABIName;
1148   }
1149 }
1150 
1151 // If we are using a hard-float ABI, but do not have floating point registers,
1152 // then report an error for any function arguments or returns which would be
1153 // passed in floating-pint registers.
1154 void AArch64TargetCodeGenInfo::checkFunctionABI(
1155     CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
1156   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1157   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1158 
1159   if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
1160     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo,
1161                          FuncDecl->getReturnType(), FuncDecl,
1162                          FuncDecl->getLocation());
1163     for (ParmVarDecl *PVD : FuncDecl->parameters()) {
1164       diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(),
1165                            PVD, FuncDecl->getLocation());
1166     }
1167   }
1168 }
1169 
1170 enum class ArmSMEInlinability : uint8_t {
1171   Ok = 0,
1172   ErrorCalleeRequiresNewZA = 1 << 0,
1173   ErrorCalleeRequiresNewZT0 = 1 << 1,
1174   WarnIncompatibleStreamingModes = 1 << 2,
1175   ErrorIncompatibleStreamingModes = 1 << 3,
1176 
1177   IncompatibleStreamingModes =
1178       WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
1179 
1180   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
1181 };
1182 
1183 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into
1184 /// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
1185 static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
1186                                                 const FunctionDecl *Callee) {
1187   bool CallerIsStreaming =
1188       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
1189   bool CalleeIsStreaming =
1190       IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
1191   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
1192   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
1193 
1194   ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
1195 
1196   if (!CalleeIsStreamingCompatible &&
1197       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1198     if (CalleeIsStreaming)
1199       Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
1200     else
1201       Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
1202   }
1203   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
1204     if (NewAttr->isNewZA())
1205       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1206     if (NewAttr->isNewZT0())
1207       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
1208   }
1209 
1210   return Inlinability;
1211 }
1212 
1213 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1214     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1215     const FunctionDecl *Callee) const {
1216   if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1217     return;
1218 
1219   ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
1220 
1221   if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
1222       ArmSMEInlinability::Ok)
1223     CGM.getDiags().Report(
1224         CallLoc,
1225         (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
1226                 ArmSMEInlinability::ErrorIncompatibleStreamingModes
1227             ? diag::err_function_always_inline_attribute_mismatch
1228             : diag::warn_function_always_inline_attribute_mismatch)
1229         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
1230 
1231   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
1232       ArmSMEInlinability::ErrorCalleeRequiresNewZA)
1233     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1234         << Callee->getDeclName();
1235 
1236   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
1237       ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
1238     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
1239         << Callee->getDeclName();
1240 }
1241 
1242 // If the target does not have floating-point registers, but we are using a
1243 // hard-float ABI, there is no way to pass floating-point, vector or HFA values
1244 // to functions, so we report an error.
1245 void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
1246     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1247     const FunctionDecl *Callee, const CallArgList &Args,
1248     QualType ReturnType) const {
1249   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1250   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1251 
1252   if (!Caller || TI.hasFeature("fp") || ABIInfo.isSoftFloat())
1253     return;
1254 
1255   diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType,
1256                        Callee ? Callee : Caller, CallLoc);
1257 
1258   for (const CallArg &Arg : Args)
1259     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(),
1260                          Callee ? Callee : Caller, CallLoc);
1261 }
1262 
1263 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1264                                                     SourceLocation CallLoc,
1265                                                     const FunctionDecl *Caller,
1266                                                     const FunctionDecl *Callee,
1267                                                     const CallArgList &Args,
1268                                                     QualType ReturnType) const {
1269   checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
1270   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
1271 }
1272 
1273 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1274     const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1275   return Caller && Callee &&
1276          GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
1277 }
1278 
1279 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
1280                                              unsigned Index,
1281                                              raw_ostream &Out) const {
1282   appendAttributeMangling(Attr->getFeatureStr(Index), Out);
1283 }
1284 
1285 void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
1286                                              raw_ostream &Out) const {
1287   if (AttrStr == "default") {
1288     Out << ".default";
1289     return;
1290   }
1291 
1292   Out << "._";
1293   SmallVector<StringRef, 8> Features;
1294   AttrStr.split(Features, "+");
1295   for (auto &Feat : Features)
1296     Feat = Feat.trim();
1297 
1298   llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) {
1299     return LHS.compare(RHS) < 0;
1300   });
1301 
1302   llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
1303   for (auto &Feat : Features)
1304     if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
1305       if (UniqueFeats.insert(Ext->Name).second)
1306         Out << 'M' << Ext->Name;
1307 }
1308 
1309 std::unique_ptr<TargetCodeGenInfo>
1310 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1311                                         AArch64ABIKind Kind) {
1312   return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
1313 }
1314 
1315 std::unique_ptr<TargetCodeGenInfo>
1316 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1317                                                AArch64ABIKind K) {
1318   return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
1319 }
1320