1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/Basic/DiagnosticFrontend.h" 12 #include "llvm/ADT/SmallBitVector.h" 13 14 using namespace clang; 15 using namespace clang::CodeGen; 16 17 namespace { 18 19 /// IsX86_MMXType - Return true if this is an MMX type. 20 bool IsX86_MMXType(llvm::Type *IRType) { 21 // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. 22 return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && 23 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && 24 IRType->getScalarSizeInBits() != 64; 25 } 26 27 static llvm::Type *X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 28 StringRef Constraint, 29 llvm::Type *Ty) { 30 if (Constraint == "k") { 31 llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext()); 32 return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits()); 33 } 34 35 // No operation needed 36 return Ty; 37 } 38 39 /// Returns true if this type can be passed in SSE registers with the 40 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. 41 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { 42 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 43 if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { 44 if (BT->getKind() == BuiltinType::LongDouble) { 45 if (&Context.getTargetInfo().getLongDoubleFormat() == 46 &llvm::APFloat::x87DoubleExtended()) 47 return false; 48 } 49 return true; 50 } 51 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 52 // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX 53 // registers specially. 54 unsigned VecSize = Context.getTypeSize(VT); 55 if (VecSize == 128 || VecSize == 256 || VecSize == 512) 56 return true; 57 } 58 return false; 59 } 60 61 /// Returns true if this aggregate is small enough to be passed in SSE registers 62 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. 63 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { 64 return NumMembers <= 4; 65 } 66 67 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. 68 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { 69 auto AI = ABIArgInfo::getDirect(T); 70 AI.setInReg(true); 71 AI.setCanBeFlattened(false); 72 return AI; 73 } 74 75 //===----------------------------------------------------------------------===// 76 // X86-32 ABI Implementation 77 //===----------------------------------------------------------------------===// 78 79 /// Similar to llvm::CCState, but for Clang. 80 struct CCState { 81 CCState(CGFunctionInfo &FI) 82 : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), 83 Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} 84 85 llvm::SmallBitVector IsPreassigned; 86 unsigned CC = CallingConv::CC_C; 87 unsigned FreeRegs = 0; 88 unsigned FreeSSERegs = 0; 89 RequiredArgs Required; 90 bool IsDelegateCall = false; 91 }; 92 93 /// X86_32ABIInfo - The X86-32 ABI information. 94 class X86_32ABIInfo : public ABIInfo { 95 enum Class { 96 Integer, 97 Float 98 }; 99 100 static const unsigned MinABIStackAlignInBytes = 4; 101 102 bool IsDarwinVectorABI; 103 bool IsRetSmallStructInRegABI; 104 bool IsWin32StructABI; 105 bool IsSoftFloatABI; 106 bool IsMCUABI; 107 bool IsLinuxABI; 108 unsigned DefaultNumRegisterParameters; 109 110 static bool isRegisterSize(unsigned Size) { 111 return (Size == 8 || Size == 16 || Size == 32 || Size == 64); 112 } 113 114 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 115 // FIXME: Assumes vectorcall is in use. 116 return isX86VectorTypeForVectorCall(getContext(), Ty); 117 } 118 119 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 120 uint64_t NumMembers) const override { 121 // FIXME: Assumes vectorcall is in use. 122 return isX86VectorCallAggregateSmallEnough(NumMembers); 123 } 124 125 bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; 126 127 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 128 /// such that the argument will be passed in memory. 129 ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; 130 131 ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; 132 133 /// Return the alignment to use for the given type on the stack. 134 unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; 135 136 Class classify(QualType Ty) const; 137 ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; 138 ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, 139 unsigned ArgIndex) const; 140 141 /// Updates the number of available free registers, returns 142 /// true if any registers were allocated. 143 bool updateFreeRegs(QualType Ty, CCState &State) const; 144 145 bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, 146 bool &NeedsPadding) const; 147 bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; 148 149 bool canExpandIndirectArgument(QualType Ty) const; 150 151 /// Rewrite the function info so that all memory arguments use 152 /// inalloca. 153 void rewriteWithInAlloca(CGFunctionInfo &FI) const; 154 155 void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 156 CharUnits &StackOffset, ABIArgInfo &Info, 157 QualType Type) const; 158 void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; 159 160 public: 161 162 void computeInfo(CGFunctionInfo &FI) const override; 163 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 164 AggValueSlot Slot) const override; 165 166 X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 167 bool RetSmallStructInRegABI, bool Win32StructABI, 168 unsigned NumRegisterParameters, bool SoftFloatABI) 169 : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), 170 IsRetSmallStructInRegABI(RetSmallStructInRegABI), 171 IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), 172 IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), 173 IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || 174 CGT.getTarget().getTriple().isOSCygMing()), 175 DefaultNumRegisterParameters(NumRegisterParameters) {} 176 }; 177 178 class X86_32SwiftABIInfo : public SwiftABIInfo { 179 public: 180 explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) 181 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} 182 183 bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, 184 bool AsReturnValue) const override { 185 // LLVM's x86-32 lowering currently only assigns up to three 186 // integer registers and three fp registers. Oddly, it'll use up to 187 // four vector registers for vectors, but those can overlap with the 188 // scalar registers. 189 return occupiesMoreThan(ComponentTys, /*total=*/3); 190 } 191 }; 192 193 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { 194 public: 195 X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 196 bool RetSmallStructInRegABI, bool Win32StructABI, 197 unsigned NumRegisterParameters, bool SoftFloatABI) 198 : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( 199 CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 200 NumRegisterParameters, SoftFloatABI)) { 201 SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); 202 } 203 204 static bool isStructReturnInRegABI( 205 const llvm::Triple &Triple, const CodeGenOptions &Opts); 206 207 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 208 CodeGen::CodeGenModule &CGM) const override; 209 210 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 211 // Darwin uses different dwarf register numbers for EH. 212 if (CGM.getTarget().getTriple().isOSDarwin()) return 5; 213 return 4; 214 } 215 216 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 217 llvm::Value *Address) const override; 218 219 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 220 StringRef Constraint, 221 llvm::Type* Ty) const override { 222 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 223 } 224 225 void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, 226 std::string &Constraints, 227 std::vector<llvm::Type *> &ResultRegTypes, 228 std::vector<llvm::Type *> &ResultTruncRegTypes, 229 std::vector<LValue> &ResultRegDests, 230 std::string &AsmString, 231 unsigned NumOutputs) const override; 232 233 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 234 return "movl\t%ebp, %ebp" 235 "\t\t// marker for objc_retainAutoreleaseReturnValue"; 236 } 237 }; 238 239 } 240 241 /// Rewrite input constraint references after adding some output constraints. 242 /// In the case where there is one output and one input and we add one output, 243 /// we need to replace all operand references greater than or equal to 1: 244 /// mov $0, $1 245 /// mov eax, $1 246 /// The result will be: 247 /// mov $0, $2 248 /// mov eax, $2 249 static void rewriteInputConstraintReferences(unsigned FirstIn, 250 unsigned NumNewOuts, 251 std::string &AsmString) { 252 std::string Buf; 253 llvm::raw_string_ostream OS(Buf); 254 size_t Pos = 0; 255 while (Pos < AsmString.size()) { 256 size_t DollarStart = AsmString.find('$', Pos); 257 if (DollarStart == std::string::npos) 258 DollarStart = AsmString.size(); 259 size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); 260 if (DollarEnd == std::string::npos) 261 DollarEnd = AsmString.size(); 262 OS << StringRef(&AsmString[Pos], DollarEnd - Pos); 263 Pos = DollarEnd; 264 size_t NumDollars = DollarEnd - DollarStart; 265 if (NumDollars % 2 != 0 && Pos < AsmString.size()) { 266 // We have an operand reference. 267 size_t DigitStart = Pos; 268 if (AsmString[DigitStart] == '{') { 269 OS << '{'; 270 ++DigitStart; 271 } 272 size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); 273 if (DigitEnd == std::string::npos) 274 DigitEnd = AsmString.size(); 275 StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); 276 unsigned OperandIndex; 277 if (!OperandStr.getAsInteger(10, OperandIndex)) { 278 if (OperandIndex >= FirstIn) 279 OperandIndex += NumNewOuts; 280 OS << OperandIndex; 281 } else { 282 OS << OperandStr; 283 } 284 Pos = DigitEnd; 285 } 286 } 287 AsmString = std::move(Buf); 288 } 289 290 /// Add output constraints for EAX:EDX because they are return registers. 291 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( 292 CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, 293 std::vector<llvm::Type *> &ResultRegTypes, 294 std::vector<llvm::Type *> &ResultTruncRegTypes, 295 std::vector<LValue> &ResultRegDests, std::string &AsmString, 296 unsigned NumOutputs) const { 297 uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); 298 299 // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is 300 // larger. 301 if (!Constraints.empty()) 302 Constraints += ','; 303 if (RetWidth <= 32) { 304 Constraints += "={eax}"; 305 ResultRegTypes.push_back(CGF.Int32Ty); 306 } else { 307 // Use the 'A' constraint for EAX:EDX. 308 Constraints += "=A"; 309 ResultRegTypes.push_back(CGF.Int64Ty); 310 } 311 312 // Truncate EAX or EAX:EDX to an integer of the appropriate size. 313 llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); 314 ResultTruncRegTypes.push_back(CoerceTy); 315 316 // Coerce the integer by bitcasting the return slot pointer. 317 ReturnSlot.setAddress(ReturnSlot.getAddress().withElementType(CoerceTy)); 318 ResultRegDests.push_back(ReturnSlot); 319 320 rewriteInputConstraintReferences(NumOutputs, 1, AsmString); 321 } 322 323 /// shouldReturnTypeInRegister - Determine if the given type should be 324 /// returned in a register (for the Darwin and MCU ABI). 325 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, 326 ASTContext &Context) const { 327 uint64_t Size = Context.getTypeSize(Ty); 328 329 // For i386, type must be register sized. 330 // For the MCU ABI, it only needs to be <= 8-byte 331 if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) 332 return false; 333 334 if (Ty->isVectorType()) { 335 // 64- and 128- bit vectors inside structures are not returned in 336 // registers. 337 if (Size == 64 || Size == 128) 338 return false; 339 340 return true; 341 } 342 343 // If this is a builtin, pointer, enum, complex type, member pointer, or 344 // member function pointer it is ok. 345 if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || 346 Ty->isAnyComplexType() || Ty->isEnumeralType() || 347 Ty->isBlockPointerType() || Ty->isMemberPointerType()) 348 return true; 349 350 // Arrays are treated like records. 351 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) 352 return shouldReturnTypeInRegister(AT->getElementType(), Context); 353 354 // Otherwise, it must be a record type. 355 const RecordType *RT = Ty->getAs<RecordType>(); 356 if (!RT) return false; 357 358 // FIXME: Traverse bases here too. 359 360 // Structure types are passed in register if all fields would be 361 // passed in a register. 362 for (const auto *FD : RT->getDecl()->fields()) { 363 // Empty fields are ignored. 364 if (isEmptyField(Context, FD, true)) 365 continue; 366 367 // Check fields recursively. 368 if (!shouldReturnTypeInRegister(FD->getType(), Context)) 369 return false; 370 } 371 return true; 372 } 373 374 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { 375 // Treat complex types as the element type. 376 if (const ComplexType *CTy = Ty->getAs<ComplexType>()) 377 Ty = CTy->getElementType(); 378 379 // Check for a type which we know has a simple scalar argument-passing 380 // convention without any padding. (We're specifically looking for 32 381 // and 64-bit integer and integer-equivalents, float, and double.) 382 if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && 383 !Ty->isEnumeralType() && !Ty->isBlockPointerType()) 384 return false; 385 386 uint64_t Size = Context.getTypeSize(Ty); 387 return Size == 32 || Size == 64; 388 } 389 390 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, 391 uint64_t &Size) { 392 for (const auto *FD : RD->fields()) { 393 // Scalar arguments on the stack get 4 byte alignment on x86. If the 394 // argument is smaller than 32-bits, expanding the struct will create 395 // alignment padding. 396 if (!is32Or64BitBasicType(FD->getType(), Context)) 397 return false; 398 399 // FIXME: Reject bit-fields wholesale; there are two problems, we don't know 400 // how to expand them yet, and the predicate for telling if a bitfield still 401 // counts as "basic" is more complicated than what we were doing previously. 402 if (FD->isBitField()) 403 return false; 404 405 Size += Context.getTypeSize(FD->getType()); 406 } 407 return true; 408 } 409 410 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, 411 uint64_t &Size) { 412 // Don't do this if there are any non-empty bases. 413 for (const CXXBaseSpecifier &Base : RD->bases()) { 414 if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), 415 Size)) 416 return false; 417 } 418 if (!addFieldSizes(Context, RD, Size)) 419 return false; 420 return true; 421 } 422 423 /// Test whether an argument type which is to be passed indirectly (on the 424 /// stack) would have the equivalent layout if it was expanded into separate 425 /// arguments. If so, we prefer to do the latter to avoid inhibiting 426 /// optimizations. 427 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { 428 // We can only expand structure types. 429 const RecordType *RT = Ty->getAs<RecordType>(); 430 if (!RT) 431 return false; 432 const RecordDecl *RD = RT->getDecl(); 433 uint64_t Size = 0; 434 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 435 if (!IsWin32StructABI) { 436 // On non-Windows, we have to conservatively match our old bitcode 437 // prototypes in order to be ABI-compatible at the bitcode level. 438 if (!CXXRD->isCLike()) 439 return false; 440 } else { 441 // Don't do this for dynamic classes. 442 if (CXXRD->isDynamicClass()) 443 return false; 444 } 445 if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) 446 return false; 447 } else { 448 if (!addFieldSizes(getContext(), RD, Size)) 449 return false; 450 } 451 452 // We can do this if there was no alignment padding. 453 return Size == getContext().getTypeSize(Ty); 454 } 455 456 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { 457 // If the return value is indirect, then the hidden argument is consuming one 458 // integer register. 459 if (State.CC != llvm::CallingConv::X86_FastCall && 460 State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) { 461 --State.FreeRegs; 462 if (!IsMCUABI) 463 return getNaturalAlignIndirectInReg(RetTy); 464 } 465 return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); 466 } 467 468 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 469 CCState &State) const { 470 if (RetTy->isVoidType()) 471 return ABIArgInfo::getIgnore(); 472 473 const Type *Base = nullptr; 474 uint64_t NumElts = 0; 475 if ((State.CC == llvm::CallingConv::X86_VectorCall || 476 State.CC == llvm::CallingConv::X86_RegCall) && 477 isHomogeneousAggregate(RetTy, Base, NumElts)) { 478 // The LLVM struct type for such an aggregate should lower properly. 479 return ABIArgInfo::getDirect(); 480 } 481 482 if (const VectorType *VT = RetTy->getAs<VectorType>()) { 483 // On Darwin, some vectors are returned in registers. 484 if (IsDarwinVectorABI) { 485 uint64_t Size = getContext().getTypeSize(RetTy); 486 487 // 128-bit vectors are a special case; they are returned in 488 // registers and we need to make sure to pick a type the LLVM 489 // backend will like. 490 if (Size == 128) 491 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 492 llvm::Type::getInt64Ty(getVMContext()), 2)); 493 494 // Always return in register if it fits in a general purpose 495 // register, or if it is 64 bits and has a single element. 496 if ((Size == 8 || Size == 16 || Size == 32) || 497 (Size == 64 && VT->getNumElements() == 1)) 498 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 499 Size)); 500 501 return getIndirectReturnResult(RetTy, State); 502 } 503 504 return ABIArgInfo::getDirect(); 505 } 506 507 if (isAggregateTypeForABI(RetTy)) { 508 if (const RecordType *RT = RetTy->getAs<RecordType>()) { 509 // Structures with flexible arrays are always indirect. 510 if (RT->getDecl()->hasFlexibleArrayMember()) 511 return getIndirectReturnResult(RetTy, State); 512 } 513 514 // If specified, structs and unions are always indirect. 515 if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) 516 return getIndirectReturnResult(RetTy, State); 517 518 // Ignore empty structs/unions. 519 if (isEmptyRecord(getContext(), RetTy, true)) 520 return ABIArgInfo::getIgnore(); 521 522 // Return complex of _Float16 as <2 x half> so the backend will use xmm0. 523 if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { 524 QualType ET = getContext().getCanonicalType(CT->getElementType()); 525 if (ET->isFloat16Type()) 526 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 527 llvm::Type::getHalfTy(getVMContext()), 2)); 528 } 529 530 // Small structures which are register sized are generally returned 531 // in a register. 532 if (shouldReturnTypeInRegister(RetTy, getContext())) { 533 uint64_t Size = getContext().getTypeSize(RetTy); 534 535 // As a special-case, if the struct is a "single-element" struct, and 536 // the field is of type "float" or "double", return it in a 537 // floating-point register. (MSVC does not apply this special case.) 538 // We apply a similar transformation for pointer types to improve the 539 // quality of the generated IR. 540 if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 541 if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) 542 || SeltTy->hasPointerRepresentation()) 543 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 544 545 // FIXME: We should be able to narrow this integer in cases with dead 546 // padding. 547 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); 548 } 549 550 return getIndirectReturnResult(RetTy, State); 551 } 552 553 // Treat an enum type as its underlying type. 554 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 555 RetTy = EnumTy->getDecl()->getIntegerType(); 556 557 if (const auto *EIT = RetTy->getAs<BitIntType>()) 558 if (EIT->getNumBits() > 64) 559 return getIndirectReturnResult(RetTy, State); 560 561 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 562 : ABIArgInfo::getDirect()); 563 } 564 565 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, 566 unsigned Align) const { 567 // Otherwise, if the alignment is less than or equal to the minimum ABI 568 // alignment, just use the default; the backend will handle this. 569 if (Align <= MinABIStackAlignInBytes) 570 return 0; // Use default alignment. 571 572 if (IsLinuxABI) { 573 // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't 574 // want to spend any effort dealing with the ramifications of ABI breaks. 575 // 576 // If the vector type is __m128/__m256/__m512, return the default alignment. 577 if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) 578 return Align; 579 } 580 // On non-Darwin, the stack type alignment is always 4. 581 if (!IsDarwinVectorABI) { 582 // Set explicit alignment, since we may need to realign the top. 583 return MinABIStackAlignInBytes; 584 } 585 586 // Otherwise, if the type contains an SSE vector type, the alignment is 16. 587 if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || 588 isRecordWithSIMDVectorType(getContext(), Ty))) 589 return 16; 590 591 return MinABIStackAlignInBytes; 592 } 593 594 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, 595 CCState &State) const { 596 if (!ByVal) { 597 if (State.FreeRegs) { 598 --State.FreeRegs; // Non-byval indirects just use one pointer. 599 if (!IsMCUABI) 600 return getNaturalAlignIndirectInReg(Ty); 601 } 602 return getNaturalAlignIndirect(Ty, false); 603 } 604 605 // Compute the byval alignment. 606 unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; 607 unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); 608 if (StackAlign == 0) 609 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); 610 611 // If the stack alignment is less than the type alignment, realign the 612 // argument. 613 bool Realign = TypeAlign > StackAlign; 614 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), 615 /*ByVal=*/true, Realign); 616 } 617 618 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { 619 const Type *T = isSingleElementStruct(Ty, getContext()); 620 if (!T) 621 T = Ty.getTypePtr(); 622 623 if (const BuiltinType *BT = T->getAs<BuiltinType>()) { 624 BuiltinType::Kind K = BT->getKind(); 625 if (K == BuiltinType::Float || K == BuiltinType::Double) 626 return Float; 627 } 628 return Integer; 629 } 630 631 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { 632 if (!IsSoftFloatABI) { 633 Class C = classify(Ty); 634 if (C == Float) 635 return false; 636 } 637 638 unsigned Size = getContext().getTypeSize(Ty); 639 unsigned SizeInRegs = (Size + 31) / 32; 640 641 if (SizeInRegs == 0) 642 return false; 643 644 if (!IsMCUABI) { 645 if (SizeInRegs > State.FreeRegs) { 646 State.FreeRegs = 0; 647 return false; 648 } 649 } else { 650 // The MCU psABI allows passing parameters in-reg even if there are 651 // earlier parameters that are passed on the stack. Also, 652 // it does not allow passing >8-byte structs in-register, 653 // even if there are 3 free registers available. 654 if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) 655 return false; 656 } 657 658 State.FreeRegs -= SizeInRegs; 659 return true; 660 } 661 662 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 663 bool &InReg, 664 bool &NeedsPadding) const { 665 // On Windows, aggregates other than HFAs are never passed in registers, and 666 // they do not consume register slots. Homogenous floating-point aggregates 667 // (HFAs) have already been dealt with at this point. 668 if (IsWin32StructABI && isAggregateTypeForABI(Ty)) 669 return false; 670 671 NeedsPadding = false; 672 InReg = !IsMCUABI; 673 674 if (!updateFreeRegs(Ty, State)) 675 return false; 676 677 if (IsMCUABI) 678 return true; 679 680 if (State.CC == llvm::CallingConv::X86_FastCall || 681 State.CC == llvm::CallingConv::X86_VectorCall || 682 State.CC == llvm::CallingConv::X86_RegCall) { 683 if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) 684 NeedsPadding = true; 685 686 return false; 687 } 688 689 return true; 690 } 691 692 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { 693 bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && 694 (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 695 Ty->isReferenceType()); 696 697 if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || 698 State.CC == llvm::CallingConv::X86_VectorCall)) 699 return false; 700 701 if (!updateFreeRegs(Ty, State)) 702 return false; 703 704 if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) 705 return false; 706 707 // Return true to apply inreg to all legal parameters except for MCU targets. 708 return !IsMCUABI; 709 } 710 711 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { 712 // Vectorcall x86 works subtly different than in x64, so the format is 713 // a bit different than the x64 version. First, all vector types (not HVAs) 714 // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. 715 // This differs from the x64 implementation, where the first 6 by INDEX get 716 // registers. 717 // In the second pass over the arguments, HVAs are passed in the remaining 718 // vector registers if possible, or indirectly by address. The address will be 719 // passed in ECX/EDX if available. Any other arguments are passed according to 720 // the usual fastcall rules. 721 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 722 for (int I = 0, E = Args.size(); I < E; ++I) { 723 const Type *Base = nullptr; 724 uint64_t NumElts = 0; 725 const QualType &Ty = Args[I].type; 726 if ((Ty->isVectorType() || Ty->isBuiltinType()) && 727 isHomogeneousAggregate(Ty, Base, NumElts)) { 728 if (State.FreeSSERegs >= NumElts) { 729 State.FreeSSERegs -= NumElts; 730 Args[I].info = ABIArgInfo::getDirectInReg(); 731 State.IsPreassigned.set(I); 732 } 733 } 734 } 735 } 736 737 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, 738 unsigned ArgIndex) const { 739 // FIXME: Set alignment on indirect arguments. 740 bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; 741 bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; 742 bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; 743 744 Ty = useFirstFieldIfTransparentUnion(Ty); 745 TypeInfo TI = getContext().getTypeInfo(Ty); 746 747 // Check with the C++ ABI first. 748 const RecordType *RT = Ty->getAs<RecordType>(); 749 if (RT) { 750 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 751 if (RAA == CGCXXABI::RAA_Indirect) { 752 return getIndirectResult(Ty, false, State); 753 } else if (State.IsDelegateCall) { 754 // Avoid having different alignments on delegate call args by always 755 // setting the alignment to 4, which is what we do for inallocas. 756 ABIArgInfo Res = getIndirectResult(Ty, false, State); 757 Res.setIndirectAlign(CharUnits::fromQuantity(4)); 758 return Res; 759 } else if (RAA == CGCXXABI::RAA_DirectInMemory) { 760 // The field index doesn't matter, we'll fix it up later. 761 return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); 762 } 763 } 764 765 // Regcall uses the concept of a homogenous vector aggregate, similar 766 // to other targets. 767 const Type *Base = nullptr; 768 uint64_t NumElts = 0; 769 if ((IsRegCall || IsVectorCall) && 770 isHomogeneousAggregate(Ty, Base, NumElts)) { 771 if (State.FreeSSERegs >= NumElts) { 772 State.FreeSSERegs -= NumElts; 773 774 // Vectorcall passes HVAs directly and does not flatten them, but regcall 775 // does. 776 if (IsVectorCall) 777 return getDirectX86Hva(); 778 779 if (Ty->isBuiltinType() || Ty->isVectorType()) 780 return ABIArgInfo::getDirect(); 781 return ABIArgInfo::getExpand(); 782 } 783 if (IsVectorCall && Ty->isBuiltinType()) 784 return ABIArgInfo::getDirect(); 785 return getIndirectResult(Ty, /*ByVal=*/false, State); 786 } 787 788 if (isAggregateTypeForABI(Ty)) { 789 // Structures with flexible arrays are always indirect. 790 // FIXME: This should not be byval! 791 if (RT && RT->getDecl()->hasFlexibleArrayMember()) 792 return getIndirectResult(Ty, true, State); 793 794 // Ignore empty structs/unions on non-Windows. 795 if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) 796 return ABIArgInfo::getIgnore(); 797 798 // Ignore 0 sized structs. 799 if (TI.Width == 0) 800 return ABIArgInfo::getIgnore(); 801 802 llvm::LLVMContext &LLVMContext = getVMContext(); 803 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); 804 bool NeedsPadding = false; 805 bool InReg; 806 if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { 807 unsigned SizeInRegs = (TI.Width + 31) / 32; 808 SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); 809 llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); 810 if (InReg) 811 return ABIArgInfo::getDirectInReg(Result); 812 else 813 return ABIArgInfo::getDirect(Result); 814 } 815 llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; 816 817 // Pass over-aligned aggregates to non-variadic functions on Windows 818 // indirectly. This behavior was added in MSVC 2015. Use the required 819 // alignment from the record layout, since that may be less than the 820 // regular type alignment, and types with required alignment of less than 4 821 // bytes are not passed indirectly. 822 if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) { 823 unsigned AlignInBits = 0; 824 if (RT) { 825 const ASTRecordLayout &Layout = 826 getContext().getASTRecordLayout(RT->getDecl()); 827 AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); 828 } else if (TI.isAlignRequired()) { 829 AlignInBits = TI.Align; 830 } 831 if (AlignInBits > 32) 832 return getIndirectResult(Ty, /*ByVal=*/false, State); 833 } 834 835 // Expand small (<= 128-bit) record types when we know that the stack layout 836 // of those arguments will match the struct. This is important because the 837 // LLVM backend isn't smart enough to remove byval, which inhibits many 838 // optimizations. 839 // Don't do this for the MCU if there are still free integer registers 840 // (see X86_64 ABI for full explanation). 841 if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && 842 canExpandIndirectArgument(Ty)) 843 return ABIArgInfo::getExpandWithPadding( 844 IsFastCall || IsVectorCall || IsRegCall, PaddingType); 845 846 return getIndirectResult(Ty, true, State); 847 } 848 849 if (const VectorType *VT = Ty->getAs<VectorType>()) { 850 // On Windows, vectors are passed directly if registers are available, or 851 // indirectly if not. This avoids the need to align argument memory. Pass 852 // user-defined vector types larger than 512 bits indirectly for simplicity. 853 if (IsWin32StructABI) { 854 if (TI.Width <= 512 && State.FreeSSERegs > 0) { 855 --State.FreeSSERegs; 856 return ABIArgInfo::getDirectInReg(); 857 } 858 return getIndirectResult(Ty, /*ByVal=*/false, State); 859 } 860 861 // On Darwin, some vectors are passed in memory, we handle this by passing 862 // it as an i8/i16/i32/i64. 863 if (IsDarwinVectorABI) { 864 if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || 865 (TI.Width == 64 && VT->getNumElements() == 1)) 866 return ABIArgInfo::getDirect( 867 llvm::IntegerType::get(getVMContext(), TI.Width)); 868 } 869 870 if (IsX86_MMXType(CGT.ConvertType(Ty))) 871 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); 872 873 return ABIArgInfo::getDirect(); 874 } 875 876 877 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 878 Ty = EnumTy->getDecl()->getIntegerType(); 879 880 bool InReg = shouldPrimitiveUseInReg(Ty, State); 881 882 if (isPromotableIntegerTypeForABI(Ty)) { 883 if (InReg) 884 return ABIArgInfo::getExtendInReg(Ty, CGT.ConvertType(Ty)); 885 return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); 886 } 887 888 if (const auto *EIT = Ty->getAs<BitIntType>()) { 889 if (EIT->getNumBits() <= 64) { 890 if (InReg) 891 return ABIArgInfo::getDirectInReg(); 892 return ABIArgInfo::getDirect(); 893 } 894 return getIndirectResult(Ty, /*ByVal=*/false, State); 895 } 896 897 if (InReg) 898 return ABIArgInfo::getDirectInReg(); 899 return ABIArgInfo::getDirect(); 900 } 901 902 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { 903 CCState State(FI); 904 if (IsMCUABI) 905 State.FreeRegs = 3; 906 else if (State.CC == llvm::CallingConv::X86_FastCall) { 907 State.FreeRegs = 2; 908 State.FreeSSERegs = 3; 909 } else if (State.CC == llvm::CallingConv::X86_VectorCall) { 910 State.FreeRegs = 2; 911 State.FreeSSERegs = 6; 912 } else if (FI.getHasRegParm()) 913 State.FreeRegs = FI.getRegParm(); 914 else if (State.CC == llvm::CallingConv::X86_RegCall) { 915 State.FreeRegs = 5; 916 State.FreeSSERegs = 8; 917 } else if (IsWin32StructABI) { 918 // Since MSVC 2015, the first three SSE vectors have been passed in 919 // registers. The rest are passed indirectly. 920 State.FreeRegs = DefaultNumRegisterParameters; 921 State.FreeSSERegs = 3; 922 } else 923 State.FreeRegs = DefaultNumRegisterParameters; 924 925 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 926 FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); 927 } else if (FI.getReturnInfo().isIndirect()) { 928 // The C++ ABI is not aware of register usage, so we have to check if the 929 // return value was sret and put it in a register ourselves if appropriate. 930 if (State.FreeRegs) { 931 --State.FreeRegs; // The sret parameter consumes a register. 932 if (!IsMCUABI) 933 FI.getReturnInfo().setInReg(true); 934 } 935 } 936 937 // The chain argument effectively gives us another free register. 938 if (FI.isChainCall()) 939 ++State.FreeRegs; 940 941 // For vectorcall, do a first pass over the arguments, assigning FP and vector 942 // arguments to XMM registers as available. 943 if (State.CC == llvm::CallingConv::X86_VectorCall) 944 runVectorCallFirstPass(FI, State); 945 946 bool UsedInAlloca = false; 947 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 948 for (unsigned I = 0, E = Args.size(); I < E; ++I) { 949 // Skip arguments that have already been assigned. 950 if (State.IsPreassigned.test(I)) 951 continue; 952 953 Args[I].info = 954 classifyArgumentType(Args[I].type, State, I); 955 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); 956 } 957 958 // If we needed to use inalloca for any argument, do a second pass and rewrite 959 // all the memory arguments to use inalloca. 960 if (UsedInAlloca) 961 rewriteWithInAlloca(FI); 962 } 963 964 void 965 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 966 CharUnits &StackOffset, ABIArgInfo &Info, 967 QualType Type) const { 968 // Arguments are always 4-byte-aligned. 969 CharUnits WordSize = CharUnits::fromQuantity(4); 970 assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); 971 972 // sret pointers and indirect things will require an extra pointer 973 // indirection, unless they are byval. Most things are byval, and will not 974 // require this indirection. 975 bool IsIndirect = false; 976 if (Info.isIndirect() && !Info.getIndirectByVal()) 977 IsIndirect = true; 978 Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); 979 llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); 980 if (IsIndirect) 981 LLTy = llvm::PointerType::getUnqual(getVMContext()); 982 FrameFields.push_back(LLTy); 983 StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); 984 985 // Insert padding bytes to respect alignment. 986 CharUnits FieldEnd = StackOffset; 987 StackOffset = FieldEnd.alignTo(WordSize); 988 if (StackOffset != FieldEnd) { 989 CharUnits NumBytes = StackOffset - FieldEnd; 990 llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); 991 Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); 992 FrameFields.push_back(Ty); 993 } 994 } 995 996 static bool isArgInAlloca(const ABIArgInfo &Info) { 997 // Leave ignored and inreg arguments alone. 998 switch (Info.getKind()) { 999 case ABIArgInfo::InAlloca: 1000 return true; 1001 case ABIArgInfo::Ignore: 1002 case ABIArgInfo::IndirectAliased: 1003 return false; 1004 case ABIArgInfo::Indirect: 1005 case ABIArgInfo::Direct: 1006 case ABIArgInfo::Extend: 1007 return !Info.getInReg(); 1008 case ABIArgInfo::Expand: 1009 case ABIArgInfo::CoerceAndExpand: 1010 // These are aggregate types which are never passed in registers when 1011 // inalloca is involved. 1012 return true; 1013 } 1014 llvm_unreachable("invalid enum"); 1015 } 1016 1017 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { 1018 assert(IsWin32StructABI && "inalloca only supported on win32"); 1019 1020 // Build a packed struct type for all of the arguments in memory. 1021 SmallVector<llvm::Type *, 6> FrameFields; 1022 1023 // The stack alignment is always 4. 1024 CharUnits StackAlign = CharUnits::fromQuantity(4); 1025 1026 CharUnits StackOffset; 1027 CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); 1028 1029 // Put 'this' into the struct before 'sret', if necessary. 1030 bool IsThisCall = 1031 FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; 1032 ABIArgInfo &Ret = FI.getReturnInfo(); 1033 if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && 1034 isArgInAlloca(I->info)) { 1035 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1036 ++I; 1037 } 1038 1039 // Put the sret parameter into the inalloca struct if it's in memory. 1040 if (Ret.isIndirect() && !Ret.getInReg()) { 1041 addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); 1042 // On Windows, the hidden sret parameter is always returned in eax. 1043 Ret.setInAllocaSRet(IsWin32StructABI); 1044 } 1045 1046 // Skip the 'this' parameter in ecx. 1047 if (IsThisCall) 1048 ++I; 1049 1050 // Put arguments passed in memory into the struct. 1051 for (; I != E; ++I) { 1052 if (isArgInAlloca(I->info)) 1053 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1054 } 1055 1056 FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, 1057 /*isPacked=*/true), 1058 StackAlign); 1059 } 1060 1061 RValue X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1062 QualType Ty, AggValueSlot Slot) const { 1063 1064 auto TypeInfo = getContext().getTypeInfoInChars(Ty); 1065 1066 CCState State(*const_cast<CGFunctionInfo *>(CGF.CurFnInfo)); 1067 ABIArgInfo AI = classifyArgumentType(Ty, State, /*ArgIndex*/ 0); 1068 // Empty records are ignored for parameter passing purposes. 1069 if (AI.isIgnore()) 1070 return Slot.asRValue(); 1071 1072 // x86-32 changes the alignment of certain arguments on the stack. 1073 // 1074 // Just messing with TypeInfo like this works because we never pass 1075 // anything indirectly. 1076 TypeInfo.Align = CharUnits::fromQuantity( 1077 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); 1078 1079 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, 1080 CharUnits::fromQuantity(4), 1081 /*AllowHigherAlign*/ true, Slot); 1082 } 1083 1084 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( 1085 const llvm::Triple &Triple, const CodeGenOptions &Opts) { 1086 assert(Triple.getArch() == llvm::Triple::x86); 1087 1088 switch (Opts.getStructReturnConvention()) { 1089 case CodeGenOptions::SRCK_Default: 1090 break; 1091 case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return 1092 return false; 1093 case CodeGenOptions::SRCK_InRegs: // -freg-struct-return 1094 return true; 1095 } 1096 1097 if (Triple.isOSDarwin() || Triple.isOSIAMCU()) 1098 return true; 1099 1100 switch (Triple.getOS()) { 1101 case llvm::Triple::DragonFly: 1102 case llvm::Triple::FreeBSD: 1103 case llvm::Triple::OpenBSD: 1104 case llvm::Triple::Win32: 1105 return true; 1106 default: 1107 return false; 1108 } 1109 } 1110 1111 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, 1112 CodeGen::CodeGenModule &CGM) { 1113 if (!FD->hasAttr<AnyX86InterruptAttr>()) 1114 return; 1115 1116 llvm::Function *Fn = cast<llvm::Function>(GV); 1117 Fn->setCallingConv(llvm::CallingConv::X86_INTR); 1118 if (FD->getNumParams() == 0) 1119 return; 1120 1121 auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); 1122 llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); 1123 llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( 1124 Fn->getContext(), ByValTy); 1125 Fn->addParamAttr(0, NewAttr); 1126 } 1127 1128 void X86_32TargetCodeGenInfo::setTargetAttributes( 1129 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1130 if (GV->isDeclaration()) 1131 return; 1132 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1133 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1134 llvm::Function *Fn = cast<llvm::Function>(GV); 1135 Fn->addFnAttr("stackrealign"); 1136 } 1137 1138 addX86InterruptAttrs(FD, GV, CGM); 1139 } 1140 } 1141 1142 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( 1143 CodeGen::CodeGenFunction &CGF, 1144 llvm::Value *Address) const { 1145 CodeGen::CGBuilderTy &Builder = CGF.Builder; 1146 1147 llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); 1148 1149 // 0-7 are the eight integer registers; the order is different 1150 // on Darwin (for EH), but the range is the same. 1151 // 8 is %eip. 1152 AssignToArrayRange(Builder, Address, Four8, 0, 8); 1153 1154 if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { 1155 // 12-16 are st(0..4). Not sure why we stop at 4. 1156 // These have size 16, which is sizeof(long double) on 1157 // platforms with 8-byte alignment for that type. 1158 llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); 1159 AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); 1160 1161 } else { 1162 // 9 is %eflags, which doesn't get a size on Darwin for some 1163 // reason. 1164 Builder.CreateAlignedStore( 1165 Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), 1166 CharUnits::One()); 1167 1168 // 11-16 are st(0..5). Not sure why we stop at 5. 1169 // These have size 12, which is sizeof(long double) on 1170 // platforms with 4-byte alignment for that type. 1171 llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); 1172 AssignToArrayRange(Builder, Address, Twelve8, 11, 16); 1173 } 1174 1175 return false; 1176 } 1177 1178 //===----------------------------------------------------------------------===// 1179 // X86-64 ABI Implementation 1180 //===----------------------------------------------------------------------===// 1181 1182 1183 namespace { 1184 1185 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. 1186 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { 1187 switch (AVXLevel) { 1188 case X86AVXABILevel::AVX512: 1189 return 512; 1190 case X86AVXABILevel::AVX: 1191 return 256; 1192 case X86AVXABILevel::None: 1193 return 128; 1194 } 1195 llvm_unreachable("Unknown AVXLevel"); 1196 } 1197 1198 /// X86_64ABIInfo - The X86_64 ABI information. 1199 class X86_64ABIInfo : public ABIInfo { 1200 enum Class { 1201 Integer = 0, 1202 SSE, 1203 SSEUp, 1204 X87, 1205 X87Up, 1206 ComplexX87, 1207 NoClass, 1208 Memory 1209 }; 1210 1211 /// merge - Implement the X86_64 ABI merging algorithm. 1212 /// 1213 /// Merge an accumulating classification \arg Accum with a field 1214 /// classification \arg Field. 1215 /// 1216 /// \param Accum - The accumulating classification. This should 1217 /// always be either NoClass or the result of a previous merge 1218 /// call. In addition, this should never be Memory (the caller 1219 /// should just return Memory for the aggregate). 1220 static Class merge(Class Accum, Class Field); 1221 1222 /// postMerge - Implement the X86_64 ABI post merging algorithm. 1223 /// 1224 /// Post merger cleanup, reduces a malformed Hi and Lo pair to 1225 /// final MEMORY or SSE classes when necessary. 1226 /// 1227 /// \param AggregateSize - The size of the current aggregate in 1228 /// the classification process. 1229 /// 1230 /// \param Lo - The classification for the parts of the type 1231 /// residing in the low word of the containing object. 1232 /// 1233 /// \param Hi - The classification for the parts of the type 1234 /// residing in the higher words of the containing object. 1235 /// 1236 void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; 1237 1238 /// classify - Determine the x86_64 register classes in which the 1239 /// given type T should be passed. 1240 /// 1241 /// \param Lo - The classification for the parts of the type 1242 /// residing in the low word of the containing object. 1243 /// 1244 /// \param Hi - The classification for the parts of the type 1245 /// residing in the high word of the containing object. 1246 /// 1247 /// \param OffsetBase - The bit offset of this type in the 1248 /// containing object. Some parameters are classified different 1249 /// depending on whether they straddle an eightbyte boundary. 1250 /// 1251 /// \param isNamedArg - Whether the argument in question is a "named" 1252 /// argument, as used in AMD64-ABI 3.5.7. 1253 /// 1254 /// \param IsRegCall - Whether the calling conversion is regcall. 1255 /// 1256 /// If a word is unused its result will be NoClass; if a type should 1257 /// be passed in Memory then at least the classification of \arg Lo 1258 /// will be Memory. 1259 /// 1260 /// The \arg Lo class will be NoClass iff the argument is ignored. 1261 /// 1262 /// If the \arg Lo class is ComplexX87, then the \arg Hi class will 1263 /// also be ComplexX87. 1264 void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, 1265 bool isNamedArg, bool IsRegCall = false) const; 1266 1267 llvm::Type *GetByteVectorType(QualType Ty) const; 1268 llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, 1269 unsigned IROffset, QualType SourceTy, 1270 unsigned SourceOffset) const; 1271 llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, 1272 unsigned IROffset, QualType SourceTy, 1273 unsigned SourceOffset) const; 1274 1275 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1276 /// such that the argument will be returned in memory. 1277 ABIArgInfo getIndirectReturnResult(QualType Ty) const; 1278 1279 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1280 /// such that the argument will be passed in memory. 1281 /// 1282 /// \param freeIntRegs - The number of free integer registers remaining 1283 /// available. 1284 ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; 1285 1286 ABIArgInfo classifyReturnType(QualType RetTy) const; 1287 1288 ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, 1289 unsigned &neededInt, unsigned &neededSSE, 1290 bool isNamedArg, 1291 bool IsRegCall = false) const; 1292 1293 ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 1294 unsigned &NeededSSE, 1295 unsigned &MaxVectorWidth) const; 1296 1297 ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 1298 unsigned &NeededSSE, 1299 unsigned &MaxVectorWidth) const; 1300 1301 bool IsIllegalVectorType(QualType Ty) const; 1302 1303 /// The 0.98 ABI revision clarified a lot of ambiguities, 1304 /// unfortunately in ways that were not always consistent with 1305 /// certain previous compilers. In particular, platforms which 1306 /// required strict binary compatibility with older versions of GCC 1307 /// may need to exempt themselves. 1308 bool honorsRevision0_98() const { 1309 return !getTarget().getTriple().isOSDarwin(); 1310 } 1311 1312 /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to 1313 /// classify it as INTEGER (for compatibility with older clang compilers). 1314 bool classifyIntegerMMXAsSSE() const { 1315 // Clang <= 3.8 did not do this. 1316 if (getContext().getLangOpts().getClangABICompat() <= 1317 LangOptions::ClangABI::Ver3_8) 1318 return false; 1319 1320 const llvm::Triple &Triple = getTarget().getTriple(); 1321 if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) 1322 return false; 1323 return true; 1324 } 1325 1326 // GCC classifies vectors of __int128 as memory. 1327 bool passInt128VectorsInMem() const { 1328 // Clang <= 9.0 did not do this. 1329 if (getContext().getLangOpts().getClangABICompat() <= 1330 LangOptions::ClangABI::Ver9) 1331 return false; 1332 1333 const llvm::Triple &T = getTarget().getTriple(); 1334 return T.isOSLinux() || T.isOSNetBSD(); 1335 } 1336 1337 X86AVXABILevel AVXLevel; 1338 // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 1339 // 64-bit hardware. 1340 bool Has64BitPointers; 1341 1342 public: 1343 X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1344 : ABIInfo(CGT), AVXLevel(AVXLevel), 1345 Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} 1346 1347 bool isPassedUsingAVXType(QualType type) const { 1348 unsigned neededInt, neededSSE; 1349 // The freeIntRegs argument doesn't matter here. 1350 ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, 1351 /*isNamedArg*/true); 1352 if (info.isDirect()) { 1353 llvm::Type *ty = info.getCoerceToType(); 1354 if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) 1355 return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; 1356 } 1357 return false; 1358 } 1359 1360 void computeInfo(CGFunctionInfo &FI) const override; 1361 1362 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1363 AggValueSlot Slot) const override; 1364 RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1365 AggValueSlot Slot) const override; 1366 1367 bool has64BitPointers() const { 1368 return Has64BitPointers; 1369 } 1370 }; 1371 1372 /// WinX86_64ABIInfo - The Windows X86_64 ABI information. 1373 class WinX86_64ABIInfo : public ABIInfo { 1374 public: 1375 WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1376 : ABIInfo(CGT), AVXLevel(AVXLevel), 1377 IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} 1378 1379 void computeInfo(CGFunctionInfo &FI) const override; 1380 1381 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1382 AggValueSlot Slot) const override; 1383 1384 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 1385 // FIXME: Assumes vectorcall is in use. 1386 return isX86VectorTypeForVectorCall(getContext(), Ty); 1387 } 1388 1389 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 1390 uint64_t NumMembers) const override { 1391 // FIXME: Assumes vectorcall is in use. 1392 return isX86VectorCallAggregateSmallEnough(NumMembers); 1393 } 1394 1395 private: 1396 ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, 1397 bool IsVectorCall, bool IsRegCall) const; 1398 ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, 1399 const ABIArgInfo ¤t) const; 1400 1401 X86AVXABILevel AVXLevel; 1402 1403 bool IsMingw64; 1404 }; 1405 1406 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1407 public: 1408 X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1409 : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { 1410 SwiftInfo = 1411 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1412 } 1413 1414 /// Disable tail call on x86-64. The epilogue code before the tail jump blocks 1415 /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. 1416 bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } 1417 1418 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1419 return 7; 1420 } 1421 1422 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1423 llvm::Value *Address) const override { 1424 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1425 1426 // 0-15 are the 16 integer registers. 1427 // 16 is %rip. 1428 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1429 return false; 1430 } 1431 1432 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 1433 StringRef Constraint, 1434 llvm::Type* Ty) const override { 1435 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 1436 } 1437 1438 bool isNoProtoCallVariadic(const CallArgList &args, 1439 const FunctionNoProtoType *fnType) const override { 1440 // The default CC on x86-64 sets %al to the number of SSA 1441 // registers used, and GCC sets this when calling an unprototyped 1442 // function, so we override the default behavior. However, don't do 1443 // that when AVX types are involved: the ABI explicitly states it is 1444 // undefined, and it doesn't work in practice because of how the ABI 1445 // defines varargs anyway. 1446 if (fnType->getCallConv() == CC_C) { 1447 bool HasAVXType = false; 1448 for (CallArgList::const_iterator 1449 it = args.begin(), ie = args.end(); it != ie; ++it) { 1450 if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) { 1451 HasAVXType = true; 1452 break; 1453 } 1454 } 1455 1456 if (!HasAVXType) 1457 return true; 1458 } 1459 1460 return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); 1461 } 1462 1463 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1464 CodeGen::CodeGenModule &CGM) const override { 1465 if (GV->isDeclaration()) 1466 return; 1467 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1468 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1469 llvm::Function *Fn = cast<llvm::Function>(GV); 1470 Fn->addFnAttr("stackrealign"); 1471 } 1472 1473 addX86InterruptAttrs(FD, GV, CGM); 1474 } 1475 } 1476 1477 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 1478 const FunctionDecl *Caller, 1479 const FunctionDecl *Callee, const CallArgList &Args, 1480 QualType ReturnType) const override; 1481 }; 1482 } // namespace 1483 1484 static void initFeatureMaps(const ASTContext &Ctx, 1485 llvm::StringMap<bool> &CallerMap, 1486 const FunctionDecl *Caller, 1487 llvm::StringMap<bool> &CalleeMap, 1488 const FunctionDecl *Callee) { 1489 if (CalleeMap.empty() && CallerMap.empty()) { 1490 // The caller is potentially nullptr in the case where the call isn't in a 1491 // function. In this case, the getFunctionFeatureMap ensures we just get 1492 // the TU level setting (since it cannot be modified by 'target'.. 1493 Ctx.getFunctionFeatureMap(CallerMap, Caller); 1494 Ctx.getFunctionFeatureMap(CalleeMap, Callee); 1495 } 1496 } 1497 1498 static bool checkAVXParamFeature(DiagnosticsEngine &Diag, 1499 SourceLocation CallLoc, 1500 const llvm::StringMap<bool> &CallerMap, 1501 const llvm::StringMap<bool> &CalleeMap, 1502 QualType Ty, StringRef Feature, 1503 bool IsArgument) { 1504 bool CallerHasFeat = CallerMap.lookup(Feature); 1505 bool CalleeHasFeat = CalleeMap.lookup(Feature); 1506 if (!CallerHasFeat && !CalleeHasFeat) 1507 return Diag.Report(CallLoc, diag::warn_avx_calling_convention) 1508 << IsArgument << Ty << Feature; 1509 1510 // Mixing calling conventions here is very clearly an error. 1511 if (!CallerHasFeat || !CalleeHasFeat) 1512 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1513 << IsArgument << Ty << Feature; 1514 1515 // Else, both caller and callee have the required feature, so there is no need 1516 // to diagnose. 1517 return false; 1518 } 1519 1520 static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag, 1521 SourceLocation CallLoc, 1522 const llvm::StringMap<bool> &CallerMap, 1523 const llvm::StringMap<bool> &CalleeMap, 1524 QualType Ty, bool IsArgument) { 1525 bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512"); 1526 bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512"); 1527 1528 // Forbid 512-bit or larger vector pass or return when we disabled ZMM 1529 // instructions. 1530 if (Caller256 || Callee256) 1531 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1532 << IsArgument << Ty << "evex512"; 1533 1534 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1535 "avx512f", IsArgument); 1536 } 1537 1538 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, 1539 SourceLocation CallLoc, 1540 const llvm::StringMap<bool> &CallerMap, 1541 const llvm::StringMap<bool> &CalleeMap, QualType Ty, 1542 bool IsArgument) { 1543 uint64_t Size = Ctx.getTypeSize(Ty); 1544 if (Size > 256) 1545 return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1546 IsArgument); 1547 1548 if (Size > 128) 1549 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", 1550 IsArgument); 1551 1552 return false; 1553 } 1554 1555 void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, 1556 SourceLocation CallLoc, 1557 const FunctionDecl *Caller, 1558 const FunctionDecl *Callee, 1559 const CallArgList &Args, 1560 QualType ReturnType) const { 1561 if (!Callee) 1562 return; 1563 1564 llvm::StringMap<bool> CallerMap; 1565 llvm::StringMap<bool> CalleeMap; 1566 unsigned ArgIndex = 0; 1567 1568 // We need to loop through the actual call arguments rather than the 1569 // function's parameters, in case this variadic. 1570 for (const CallArg &Arg : Args) { 1571 // The "avx" feature changes how vectors >128 in size are passed. "avx512f" 1572 // additionally changes how vectors >256 in size are passed. Like GCC, we 1573 // warn when a function is called with an argument where this will change. 1574 // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, 1575 // the caller and callee features are mismatched. 1576 // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can 1577 // change its ABI with attribute-target after this call. 1578 if (Arg.getType()->isVectorType() && 1579 CGM.getContext().getTypeSize(Arg.getType()) > 128) { 1580 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1581 QualType Ty = Arg.getType(); 1582 // The CallArg seems to have desugared the type already, so for clearer 1583 // diagnostics, replace it with the type in the FunctionDecl if possible. 1584 if (ArgIndex < Callee->getNumParams()) 1585 Ty = Callee->getParamDecl(ArgIndex)->getType(); 1586 1587 if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1588 CalleeMap, Ty, /*IsArgument*/ true)) 1589 return; 1590 } 1591 ++ArgIndex; 1592 } 1593 1594 // Check return always, as we don't have a good way of knowing in codegen 1595 // whether this value is used, tail-called, etc. 1596 if (Callee->getReturnType()->isVectorType() && 1597 CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { 1598 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1599 checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1600 CalleeMap, Callee->getReturnType(), 1601 /*IsArgument*/ false); 1602 } 1603 } 1604 1605 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { 1606 // If the argument does not end in .lib, automatically add the suffix. 1607 // If the argument contains a space, enclose it in quotes. 1608 // This matches the behavior of MSVC. 1609 bool Quote = Lib.contains(' '); 1610 std::string ArgStr = Quote ? "\"" : ""; 1611 ArgStr += Lib; 1612 if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) 1613 ArgStr += ".lib"; 1614 ArgStr += Quote ? "\"" : ""; 1615 return ArgStr; 1616 } 1617 1618 namespace { 1619 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { 1620 public: 1621 WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1622 bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, 1623 unsigned NumRegisterParameters) 1624 : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, 1625 Win32StructABI, NumRegisterParameters, false) {} 1626 1627 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1628 CodeGen::CodeGenModule &CGM) const override; 1629 1630 void getDependentLibraryOption(llvm::StringRef Lib, 1631 llvm::SmallString<24> &Opt) const override { 1632 Opt = "/DEFAULTLIB:"; 1633 Opt += qualifyWindowsLibrary(Lib); 1634 } 1635 1636 void getDetectMismatchOption(llvm::StringRef Name, 1637 llvm::StringRef Value, 1638 llvm::SmallString<32> &Opt) const override { 1639 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1640 } 1641 }; 1642 } // namespace 1643 1644 void WinX86_32TargetCodeGenInfo::setTargetAttributes( 1645 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1646 X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1647 if (GV->isDeclaration()) 1648 return; 1649 addStackProbeTargetAttributes(D, GV, CGM); 1650 } 1651 1652 namespace { 1653 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1654 public: 1655 WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1656 X86AVXABILevel AVXLevel) 1657 : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { 1658 SwiftInfo = 1659 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1660 } 1661 1662 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1663 CodeGen::CodeGenModule &CGM) const override; 1664 1665 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1666 return 7; 1667 } 1668 1669 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1670 llvm::Value *Address) const override { 1671 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1672 1673 // 0-15 are the 16 integer registers. 1674 // 16 is %rip. 1675 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1676 return false; 1677 } 1678 1679 void getDependentLibraryOption(llvm::StringRef Lib, 1680 llvm::SmallString<24> &Opt) const override { 1681 Opt = "/DEFAULTLIB:"; 1682 Opt += qualifyWindowsLibrary(Lib); 1683 } 1684 1685 void getDetectMismatchOption(llvm::StringRef Name, 1686 llvm::StringRef Value, 1687 llvm::SmallString<32> &Opt) const override { 1688 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1689 } 1690 }; 1691 } // namespace 1692 1693 void WinX86_64TargetCodeGenInfo::setTargetAttributes( 1694 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1695 TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1696 if (GV->isDeclaration()) 1697 return; 1698 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1699 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1700 llvm::Function *Fn = cast<llvm::Function>(GV); 1701 Fn->addFnAttr("stackrealign"); 1702 } 1703 1704 addX86InterruptAttrs(FD, GV, CGM); 1705 } 1706 1707 addStackProbeTargetAttributes(D, GV, CGM); 1708 } 1709 1710 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, 1711 Class &Hi) const { 1712 // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: 1713 // 1714 // (a) If one of the classes is Memory, the whole argument is passed in 1715 // memory. 1716 // 1717 // (b) If X87UP is not preceded by X87, the whole argument is passed in 1718 // memory. 1719 // 1720 // (c) If the size of the aggregate exceeds two eightbytes and the first 1721 // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole 1722 // argument is passed in memory. NOTE: This is necessary to keep the 1723 // ABI working for processors that don't support the __m256 type. 1724 // 1725 // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. 1726 // 1727 // Some of these are enforced by the merging logic. Others can arise 1728 // only with unions; for example: 1729 // union { _Complex double; unsigned; } 1730 // 1731 // Note that clauses (b) and (c) were added in 0.98. 1732 // 1733 if (Hi == Memory) 1734 Lo = Memory; 1735 if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) 1736 Lo = Memory; 1737 if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) 1738 Lo = Memory; 1739 if (Hi == SSEUp && Lo != SSE) 1740 Hi = SSE; 1741 } 1742 1743 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { 1744 // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is 1745 // classified recursively so that always two fields are 1746 // considered. The resulting class is calculated according to 1747 // the classes of the fields in the eightbyte: 1748 // 1749 // (a) If both classes are equal, this is the resulting class. 1750 // 1751 // (b) If one of the classes is NO_CLASS, the resulting class is 1752 // the other class. 1753 // 1754 // (c) If one of the classes is MEMORY, the result is the MEMORY 1755 // class. 1756 // 1757 // (d) If one of the classes is INTEGER, the result is the 1758 // INTEGER. 1759 // 1760 // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, 1761 // MEMORY is used as class. 1762 // 1763 // (f) Otherwise class SSE is used. 1764 1765 // Accum should never be memory (we should have returned) or 1766 // ComplexX87 (because this cannot be passed in a structure). 1767 assert((Accum != Memory && Accum != ComplexX87) && 1768 "Invalid accumulated classification during merge."); 1769 if (Accum == Field || Field == NoClass) 1770 return Accum; 1771 if (Field == Memory) 1772 return Memory; 1773 if (Accum == NoClass) 1774 return Field; 1775 if (Accum == Integer || Field == Integer) 1776 return Integer; 1777 if (Field == X87 || Field == X87Up || Field == ComplexX87 || 1778 Accum == X87 || Accum == X87Up) 1779 return Memory; 1780 return SSE; 1781 } 1782 1783 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, 1784 Class &Hi, bool isNamedArg, bool IsRegCall) const { 1785 // FIXME: This code can be simplified by introducing a simple value class for 1786 // Class pairs with appropriate constructor methods for the various 1787 // situations. 1788 1789 // FIXME: Some of the split computations are wrong; unaligned vectors 1790 // shouldn't be passed in registers for example, so there is no chance they 1791 // can straddle an eightbyte. Verify & simplify. 1792 1793 Lo = Hi = NoClass; 1794 1795 Class &Current = OffsetBase < 64 ? Lo : Hi; 1796 Current = Memory; 1797 1798 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 1799 BuiltinType::Kind k = BT->getKind(); 1800 1801 if (k == BuiltinType::Void) { 1802 Current = NoClass; 1803 } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { 1804 Lo = Integer; 1805 Hi = Integer; 1806 } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { 1807 Current = Integer; 1808 } else if (k == BuiltinType::Float || k == BuiltinType::Double || 1809 k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { 1810 Current = SSE; 1811 } else if (k == BuiltinType::Float128) { 1812 Lo = SSE; 1813 Hi = SSEUp; 1814 } else if (k == BuiltinType::LongDouble) { 1815 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1816 if (LDF == &llvm::APFloat::IEEEquad()) { 1817 Lo = SSE; 1818 Hi = SSEUp; 1819 } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { 1820 Lo = X87; 1821 Hi = X87Up; 1822 } else if (LDF == &llvm::APFloat::IEEEdouble()) { 1823 Current = SSE; 1824 } else 1825 llvm_unreachable("unexpected long double representation!"); 1826 } 1827 // FIXME: _Decimal32 and _Decimal64 are SSE. 1828 // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). 1829 return; 1830 } 1831 1832 if (const EnumType *ET = Ty->getAs<EnumType>()) { 1833 // Classify the underlying integer type. 1834 classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); 1835 return; 1836 } 1837 1838 if (Ty->hasPointerRepresentation()) { 1839 Current = Integer; 1840 return; 1841 } 1842 1843 if (Ty->isMemberPointerType()) { 1844 if (Ty->isMemberFunctionPointerType()) { 1845 if (Has64BitPointers) { 1846 // If Has64BitPointers, this is an {i64, i64}, so classify both 1847 // Lo and Hi now. 1848 Lo = Hi = Integer; 1849 } else { 1850 // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that 1851 // straddles an eightbyte boundary, Hi should be classified as well. 1852 uint64_t EB_FuncPtr = (OffsetBase) / 64; 1853 uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; 1854 if (EB_FuncPtr != EB_ThisAdj) { 1855 Lo = Hi = Integer; 1856 } else { 1857 Current = Integer; 1858 } 1859 } 1860 } else { 1861 Current = Integer; 1862 } 1863 return; 1864 } 1865 1866 if (const VectorType *VT = Ty->getAs<VectorType>()) { 1867 uint64_t Size = getContext().getTypeSize(VT); 1868 if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { 1869 // gcc passes the following as integer: 1870 // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> 1871 // 2 bytes - <2 x char>, <1 x short> 1872 // 1 byte - <1 x char> 1873 Current = Integer; 1874 1875 // If this type crosses an eightbyte boundary, it should be 1876 // split. 1877 uint64_t EB_Lo = (OffsetBase) / 64; 1878 uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; 1879 if (EB_Lo != EB_Hi) 1880 Hi = Lo; 1881 } else if (Size == 64) { 1882 QualType ElementType = VT->getElementType(); 1883 1884 // gcc passes <1 x double> in memory. :( 1885 if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) 1886 return; 1887 1888 // gcc passes <1 x long long> as SSE but clang used to unconditionally 1889 // pass them as integer. For platforms where clang is the de facto 1890 // platform compiler, we must continue to use integer. 1891 if (!classifyIntegerMMXAsSSE() && 1892 (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || 1893 ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || 1894 ElementType->isSpecificBuiltinType(BuiltinType::Long) || 1895 ElementType->isSpecificBuiltinType(BuiltinType::ULong))) 1896 Current = Integer; 1897 else 1898 Current = SSE; 1899 1900 // If this type crosses an eightbyte boundary, it should be 1901 // split. 1902 if (OffsetBase && OffsetBase != 64) 1903 Hi = Lo; 1904 } else if (Size == 128 || 1905 (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { 1906 QualType ElementType = VT->getElementType(); 1907 1908 // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( 1909 if (passInt128VectorsInMem() && Size != 128 && 1910 (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || 1911 ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) 1912 return; 1913 1914 // Arguments of 256-bits are split into four eightbyte chunks. The 1915 // least significant one belongs to class SSE and all the others to class 1916 // SSEUP. The original Lo and Hi design considers that types can't be 1917 // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. 1918 // This design isn't correct for 256-bits, but since there're no cases 1919 // where the upper parts would need to be inspected, avoid adding 1920 // complexity and just consider Hi to match the 64-256 part. 1921 // 1922 // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in 1923 // registers if they are "named", i.e. not part of the "..." of a 1924 // variadic function. 1925 // 1926 // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are 1927 // split into eight eightbyte chunks, one SSE and seven SSEUP. 1928 Lo = SSE; 1929 Hi = SSEUp; 1930 } 1931 return; 1932 } 1933 1934 if (const ComplexType *CT = Ty->getAs<ComplexType>()) { 1935 QualType ET = getContext().getCanonicalType(CT->getElementType()); 1936 1937 uint64_t Size = getContext().getTypeSize(Ty); 1938 if (ET->isIntegralOrEnumerationType()) { 1939 if (Size <= 64) 1940 Current = Integer; 1941 else if (Size <= 128) 1942 Lo = Hi = Integer; 1943 } else if (ET->isFloat16Type() || ET == getContext().FloatTy || 1944 ET->isBFloat16Type()) { 1945 Current = SSE; 1946 } else if (ET == getContext().DoubleTy) { 1947 Lo = Hi = SSE; 1948 } else if (ET == getContext().LongDoubleTy) { 1949 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1950 if (LDF == &llvm::APFloat::IEEEquad()) 1951 Current = Memory; 1952 else if (LDF == &llvm::APFloat::x87DoubleExtended()) 1953 Current = ComplexX87; 1954 else if (LDF == &llvm::APFloat::IEEEdouble()) 1955 Lo = Hi = SSE; 1956 else 1957 llvm_unreachable("unexpected long double representation!"); 1958 } 1959 1960 // If this complex type crosses an eightbyte boundary then it 1961 // should be split. 1962 uint64_t EB_Real = (OffsetBase) / 64; 1963 uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; 1964 if (Hi == NoClass && EB_Real != EB_Imag) 1965 Hi = Lo; 1966 1967 return; 1968 } 1969 1970 if (const auto *EITy = Ty->getAs<BitIntType>()) { 1971 if (EITy->getNumBits() <= 64) 1972 Current = Integer; 1973 else if (EITy->getNumBits() <= 128) 1974 Lo = Hi = Integer; 1975 // Larger values need to get passed in memory. 1976 return; 1977 } 1978 1979 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 1980 // Arrays are treated like structures. 1981 1982 uint64_t Size = getContext().getTypeSize(Ty); 1983 1984 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1985 // than eight eightbytes, ..., it has class MEMORY. 1986 // regcall ABI doesn't have limitation to an object. The only limitation 1987 // is the free registers, which will be checked in computeInfo. 1988 if (!IsRegCall && Size > 512) 1989 return; 1990 1991 // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned 1992 // fields, it has class MEMORY. 1993 // 1994 // Only need to check alignment of array base. 1995 if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) 1996 return; 1997 1998 // Otherwise implement simplified merge. We could be smarter about 1999 // this, but it isn't worth it and would be harder to verify. 2000 Current = NoClass; 2001 uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); 2002 uint64_t ArraySize = AT->getZExtSize(); 2003 2004 // The only case a 256-bit wide vector could be used is when the array 2005 // contains a single 256-bit element. Since Lo and Hi logic isn't extended 2006 // to work for sizes wider than 128, early check and fallback to memory. 2007 // 2008 if (Size > 128 && 2009 (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) 2010 return; 2011 2012 for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { 2013 Class FieldLo, FieldHi; 2014 classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); 2015 Lo = merge(Lo, FieldLo); 2016 Hi = merge(Hi, FieldHi); 2017 if (Lo == Memory || Hi == Memory) 2018 break; 2019 } 2020 2021 postMerge(Size, Lo, Hi); 2022 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); 2023 return; 2024 } 2025 2026 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2027 uint64_t Size = getContext().getTypeSize(Ty); 2028 2029 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 2030 // than eight eightbytes, ..., it has class MEMORY. 2031 if (Size > 512) 2032 return; 2033 2034 // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial 2035 // copy constructor or a non-trivial destructor, it is passed by invisible 2036 // reference. 2037 if (getRecordArgABI(RT, getCXXABI())) 2038 return; 2039 2040 const RecordDecl *RD = RT->getDecl(); 2041 2042 // Assume variable sized types are passed in memory. 2043 if (RD->hasFlexibleArrayMember()) 2044 return; 2045 2046 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); 2047 2048 // Reset Lo class, this will be recomputed. 2049 Current = NoClass; 2050 2051 // If this is a C++ record, classify the bases first. 2052 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2053 for (const auto &I : CXXRD->bases()) { 2054 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2055 "Unexpected base class!"); 2056 const auto *Base = 2057 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2058 2059 // Classify this field. 2060 // 2061 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a 2062 // single eightbyte, each is classified separately. Each eightbyte gets 2063 // initialized to class NO_CLASS. 2064 Class FieldLo, FieldHi; 2065 uint64_t Offset = 2066 OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); 2067 classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); 2068 Lo = merge(Lo, FieldLo); 2069 Hi = merge(Hi, FieldHi); 2070 if (Lo == Memory || Hi == Memory) { 2071 postMerge(Size, Lo, Hi); 2072 return; 2073 } 2074 } 2075 } 2076 2077 // Classify the fields one at a time, merging the results. 2078 unsigned idx = 0; 2079 bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= 2080 LangOptions::ClangABI::Ver11 || 2081 getContext().getTargetInfo().getTriple().isPS(); 2082 bool IsUnion = RT->isUnionType() && !UseClang11Compat; 2083 2084 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2085 i != e; ++i, ++idx) { 2086 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2087 bool BitField = i->isBitField(); 2088 2089 // Ignore padding bit-fields. 2090 if (BitField && i->isUnnamedBitField()) 2091 continue; 2092 2093 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than 2094 // eight eightbytes, or it contains unaligned fields, it has class MEMORY. 2095 // 2096 // The only case a 256-bit or a 512-bit wide vector could be used is when 2097 // the struct contains a single 256-bit or 512-bit element. Early check 2098 // and fallback to memory. 2099 // 2100 // FIXME: Extended the Lo and Hi logic properly to work for size wider 2101 // than 128. 2102 if (Size > 128 && 2103 ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || 2104 Size > getNativeVectorSizeForAVXABI(AVXLevel))) { 2105 Lo = Memory; 2106 postMerge(Size, Lo, Hi); 2107 return; 2108 } 2109 2110 bool IsInMemory = 2111 Offset % getContext().getTypeAlign(i->getType().getCanonicalType()); 2112 // Note, skip this test for bit-fields, see below. 2113 if (!BitField && IsInMemory) { 2114 Lo = Memory; 2115 postMerge(Size, Lo, Hi); 2116 return; 2117 } 2118 2119 // Classify this field. 2120 // 2121 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate 2122 // exceeds a single eightbyte, each is classified 2123 // separately. Each eightbyte gets initialized to class 2124 // NO_CLASS. 2125 Class FieldLo, FieldHi; 2126 2127 // Bit-fields require special handling, they do not force the 2128 // structure to be passed in memory even if unaligned, and 2129 // therefore they can straddle an eightbyte. 2130 if (BitField) { 2131 assert(!i->isUnnamedBitField()); 2132 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2133 uint64_t Size = i->getBitWidthValue(); 2134 2135 uint64_t EB_Lo = Offset / 64; 2136 uint64_t EB_Hi = (Offset + Size - 1) / 64; 2137 2138 if (EB_Lo) { 2139 assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); 2140 FieldLo = NoClass; 2141 FieldHi = Integer; 2142 } else { 2143 FieldLo = Integer; 2144 FieldHi = EB_Hi ? Integer : NoClass; 2145 } 2146 } else 2147 classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); 2148 Lo = merge(Lo, FieldLo); 2149 Hi = merge(Hi, FieldHi); 2150 if (Lo == Memory || Hi == Memory) 2151 break; 2152 } 2153 2154 postMerge(Size, Lo, Hi); 2155 } 2156 } 2157 2158 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { 2159 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2160 // place naturally. 2161 if (!isAggregateTypeForABI(Ty)) { 2162 // Treat an enum type as its underlying type. 2163 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2164 Ty = EnumTy->getDecl()->getIntegerType(); 2165 2166 if (Ty->isBitIntType()) 2167 return getNaturalAlignIndirect(Ty); 2168 2169 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2170 : ABIArgInfo::getDirect()); 2171 } 2172 2173 return getNaturalAlignIndirect(Ty); 2174 } 2175 2176 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { 2177 if (const VectorType *VecTy = Ty->getAs<VectorType>()) { 2178 uint64_t Size = getContext().getTypeSize(VecTy); 2179 unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); 2180 if (Size <= 64 || Size > LargestVector) 2181 return true; 2182 QualType EltTy = VecTy->getElementType(); 2183 if (passInt128VectorsInMem() && 2184 (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || 2185 EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) 2186 return true; 2187 } 2188 2189 return false; 2190 } 2191 2192 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, 2193 unsigned freeIntRegs) const { 2194 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2195 // place naturally. 2196 // 2197 // This assumption is optimistic, as there could be free registers available 2198 // when we need to pass this argument in memory, and LLVM could try to pass 2199 // the argument in the free register. This does not seem to happen currently, 2200 // but this code would be much safer if we could mark the argument with 2201 // 'onstack'. See PR12193. 2202 if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && 2203 !Ty->isBitIntType()) { 2204 // Treat an enum type as its underlying type. 2205 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2206 Ty = EnumTy->getDecl()->getIntegerType(); 2207 2208 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2209 : ABIArgInfo::getDirect()); 2210 } 2211 2212 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) 2213 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 2214 2215 // Compute the byval alignment. We specify the alignment of the byval in all 2216 // cases so that the mid-level optimizer knows the alignment of the byval. 2217 unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); 2218 2219 // Attempt to avoid passing indirect results using byval when possible. This 2220 // is important for good codegen. 2221 // 2222 // We do this by coercing the value into a scalar type which the backend can 2223 // handle naturally (i.e., without using byval). 2224 // 2225 // For simplicity, we currently only do this when we have exhausted all of the 2226 // free integer registers. Doing this when there are free integer registers 2227 // would require more care, as we would have to ensure that the coerced value 2228 // did not claim the unused register. That would require either reording the 2229 // arguments to the function (so that any subsequent inreg values came first), 2230 // or only doing this optimization when there were no following arguments that 2231 // might be inreg. 2232 // 2233 // We currently expect it to be rare (particularly in well written code) for 2234 // arguments to be passed on the stack when there are still free integer 2235 // registers available (this would typically imply large structs being passed 2236 // by value), so this seems like a fair tradeoff for now. 2237 // 2238 // We can revisit this if the backend grows support for 'onstack' parameter 2239 // attributes. See PR12193. 2240 if (freeIntRegs == 0) { 2241 uint64_t Size = getContext().getTypeSize(Ty); 2242 2243 // If this type fits in an eightbyte, coerce it into the matching integral 2244 // type, which will end up on the stack (with alignment 8). 2245 if (Align == 8 && Size <= 64) 2246 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 2247 Size)); 2248 } 2249 2250 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); 2251 } 2252 2253 /// The ABI specifies that a value should be passed in a full vector XMM/YMM 2254 /// register. Pick an LLVM IR type that will be passed as a vector register. 2255 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { 2256 // Wrapper structs/arrays that only contain vectors are passed just like 2257 // vectors; strip them off if present. 2258 if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) 2259 Ty = QualType(InnerTy, 0); 2260 2261 llvm::Type *IRType = CGT.ConvertType(Ty); 2262 if (isa<llvm::VectorType>(IRType)) { 2263 // Don't pass vXi128 vectors in their native type, the backend can't 2264 // legalize them. 2265 if (passInt128VectorsInMem() && 2266 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { 2267 // Use a vXi64 vector. 2268 uint64_t Size = getContext().getTypeSize(Ty); 2269 return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2270 Size / 64); 2271 } 2272 2273 return IRType; 2274 } 2275 2276 if (IRType->getTypeID() == llvm::Type::FP128TyID) 2277 return IRType; 2278 2279 // We couldn't find the preferred IR vector type for 'Ty'. 2280 uint64_t Size = getContext().getTypeSize(Ty); 2281 assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); 2282 2283 2284 // Return a LLVM IR vector type based on the size of 'Ty'. 2285 return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2286 Size / 64); 2287 } 2288 2289 /// BitsContainNoUserData - Return true if the specified [start,end) bit range 2290 /// is known to either be off the end of the specified type or being in 2291 /// alignment padding. The user type specified is known to be at most 128 bits 2292 /// in size, and have passed through X86_64ABIInfo::classify with a successful 2293 /// classification that put one of the two halves in the INTEGER class. 2294 /// 2295 /// It is conservatively correct to return false. 2296 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, 2297 unsigned EndBit, ASTContext &Context) { 2298 // If the bytes being queried are off the end of the type, there is no user 2299 // data hiding here. This handles analysis of builtins, vectors and other 2300 // types that don't contain interesting padding. 2301 unsigned TySize = (unsigned)Context.getTypeSize(Ty); 2302 if (TySize <= StartBit) 2303 return true; 2304 2305 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { 2306 unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); 2307 unsigned NumElts = (unsigned)AT->getZExtSize(); 2308 2309 // Check each element to see if the element overlaps with the queried range. 2310 for (unsigned i = 0; i != NumElts; ++i) { 2311 // If the element is after the span we care about, then we're done.. 2312 unsigned EltOffset = i*EltSize; 2313 if (EltOffset >= EndBit) break; 2314 2315 unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; 2316 if (!BitsContainNoUserData(AT->getElementType(), EltStart, 2317 EndBit-EltOffset, Context)) 2318 return false; 2319 } 2320 // If it overlaps no elements, then it is safe to process as padding. 2321 return true; 2322 } 2323 2324 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2325 const RecordDecl *RD = RT->getDecl(); 2326 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 2327 2328 // If this is a C++ record, check the bases first. 2329 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2330 for (const auto &I : CXXRD->bases()) { 2331 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2332 "Unexpected base class!"); 2333 const auto *Base = 2334 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2335 2336 // If the base is after the span we care about, ignore it. 2337 unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); 2338 if (BaseOffset >= EndBit) continue; 2339 2340 unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; 2341 if (!BitsContainNoUserData(I.getType(), BaseStart, 2342 EndBit-BaseOffset, Context)) 2343 return false; 2344 } 2345 } 2346 2347 // Verify that no field has data that overlaps the region of interest. Yes 2348 // this could be sped up a lot by being smarter about queried fields, 2349 // however we're only looking at structs up to 16 bytes, so we don't care 2350 // much. 2351 unsigned idx = 0; 2352 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2353 i != e; ++i, ++idx) { 2354 unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); 2355 2356 // If we found a field after the region we care about, then we're done. 2357 if (FieldOffset >= EndBit) break; 2358 2359 unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; 2360 if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, 2361 Context)) 2362 return false; 2363 } 2364 2365 // If nothing in this record overlapped the area of interest, then we're 2366 // clean. 2367 return true; 2368 } 2369 2370 return false; 2371 } 2372 2373 /// getFPTypeAtOffset - Return a floating point type at the specified offset. 2374 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2375 const llvm::DataLayout &TD) { 2376 if (IROffset == 0 && IRType->isFloatingPointTy()) 2377 return IRType; 2378 2379 // If this is a struct, recurse into the field at the specified offset. 2380 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2381 if (!STy->getNumContainedTypes()) 2382 return nullptr; 2383 2384 const llvm::StructLayout *SL = TD.getStructLayout(STy); 2385 unsigned Elt = SL->getElementContainingOffset(IROffset); 2386 IROffset -= SL->getElementOffset(Elt); 2387 return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); 2388 } 2389 2390 // If this is an array, recurse into the field at the specified offset. 2391 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2392 llvm::Type *EltTy = ATy->getElementType(); 2393 unsigned EltSize = TD.getTypeAllocSize(EltTy); 2394 IROffset -= IROffset / EltSize * EltSize; 2395 return getFPTypeAtOffset(EltTy, IROffset, TD); 2396 } 2397 2398 return nullptr; 2399 } 2400 2401 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the 2402 /// low 8 bytes of an XMM register, corresponding to the SSE class. 2403 llvm::Type *X86_64ABIInfo:: 2404 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2405 QualType SourceTy, unsigned SourceOffset) const { 2406 const llvm::DataLayout &TD = getDataLayout(); 2407 unsigned SourceSize = 2408 (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; 2409 llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); 2410 if (!T0 || T0->isDoubleTy()) 2411 return llvm::Type::getDoubleTy(getVMContext()); 2412 2413 // Get the adjacent FP type. 2414 llvm::Type *T1 = nullptr; 2415 unsigned T0Size = TD.getTypeAllocSize(T0); 2416 if (SourceSize > T0Size) 2417 T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); 2418 if (T1 == nullptr) { 2419 // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due 2420 // to its alignment. 2421 if (T0->is16bitFPTy() && SourceSize > 4) 2422 T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2423 // If we can't get a second FP type, return a simple half or float. 2424 // avx512fp16-abi.c:pr51813_2 shows it works to return float for 2425 // {float, i8} too. 2426 if (T1 == nullptr) 2427 return T0; 2428 } 2429 2430 if (T0->isFloatTy() && T1->isFloatTy()) 2431 return llvm::FixedVectorType::get(T0, 2); 2432 2433 if (T0->is16bitFPTy() && T1->is16bitFPTy()) { 2434 llvm::Type *T2 = nullptr; 2435 if (SourceSize > 4) 2436 T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2437 if (T2 == nullptr) 2438 return llvm::FixedVectorType::get(T0, 2); 2439 return llvm::FixedVectorType::get(T0, 4); 2440 } 2441 2442 if (T0->is16bitFPTy() || T1->is16bitFPTy()) 2443 return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); 2444 2445 return llvm::Type::getDoubleTy(getVMContext()); 2446 } 2447 2448 2449 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in 2450 /// an 8-byte GPR. This means that we either have a scalar or we are talking 2451 /// about the high or low part of an up-to-16-byte struct. This routine picks 2452 /// the best LLVM IR type to represent this, which may be i64 or may be anything 2453 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, 2454 /// etc). 2455 /// 2456 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for 2457 /// the source type. IROffset is an offset in bytes into the LLVM IR type that 2458 /// the 8-byte value references. PrefType may be null. 2459 /// 2460 /// SourceTy is the source-level type for the entire argument. SourceOffset is 2461 /// an offset into this that we're processing (which is always either 0 or 8). 2462 /// 2463 llvm::Type *X86_64ABIInfo:: 2464 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2465 QualType SourceTy, unsigned SourceOffset) const { 2466 // If we're dealing with an un-offset LLVM IR type, then it means that we're 2467 // returning an 8-byte unit starting with it. See if we can safely use it. 2468 if (IROffset == 0) { 2469 // Pointers and int64's always fill the 8-byte unit. 2470 if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || 2471 IRType->isIntegerTy(64)) 2472 return IRType; 2473 2474 // If we have a 1/2/4-byte integer, we can use it only if the rest of the 2475 // goodness in the source type is just tail padding. This is allowed to 2476 // kick in for struct {double,int} on the int, but not on 2477 // struct{double,int,int} because we wouldn't return the second int. We 2478 // have to do this analysis on the source type because we can't depend on 2479 // unions being lowered a specific way etc. 2480 if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || 2481 IRType->isIntegerTy(32) || 2482 (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { 2483 unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : 2484 cast<llvm::IntegerType>(IRType)->getBitWidth(); 2485 2486 if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, 2487 SourceOffset*8+64, getContext())) 2488 return IRType; 2489 } 2490 } 2491 2492 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2493 // If this is a struct, recurse into the field at the specified offset. 2494 const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); 2495 if (IROffset < SL->getSizeInBytes()) { 2496 unsigned FieldIdx = SL->getElementContainingOffset(IROffset); 2497 IROffset -= SL->getElementOffset(FieldIdx); 2498 2499 return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, 2500 SourceTy, SourceOffset); 2501 } 2502 } 2503 2504 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2505 llvm::Type *EltTy = ATy->getElementType(); 2506 unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); 2507 unsigned EltOffset = IROffset/EltSize*EltSize; 2508 return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, 2509 SourceOffset); 2510 } 2511 2512 // Okay, we don't have any better idea of what to pass, so we pass this in an 2513 // integer register that isn't too big to fit the rest of the struct. 2514 unsigned TySizeInBytes = 2515 (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); 2516 2517 assert(TySizeInBytes != SourceOffset && "Empty field?"); 2518 2519 // It is always safe to classify this as an integer type up to i64 that 2520 // isn't larger than the structure. 2521 return llvm::IntegerType::get(getVMContext(), 2522 std::min(TySizeInBytes-SourceOffset, 8U)*8); 2523 } 2524 2525 2526 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally 2527 /// be used as elements of a two register pair to pass or return, return a 2528 /// first class aggregate to represent them. For example, if the low part of 2529 /// a by-value argument should be passed as i32* and the high part as float, 2530 /// return {i32*, float}. 2531 static llvm::Type * 2532 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, 2533 const llvm::DataLayout &TD) { 2534 // In order to correctly satisfy the ABI, we need to the high part to start 2535 // at offset 8. If the high and low parts we inferred are both 4-byte types 2536 // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have 2537 // the second element at offset 8. Check for this: 2538 unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); 2539 llvm::Align HiAlign = TD.getABITypeAlign(Hi); 2540 unsigned HiStart = llvm::alignTo(LoSize, HiAlign); 2541 assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); 2542 2543 // To handle this, we have to increase the size of the low part so that the 2544 // second element will start at an 8 byte offset. We can't increase the size 2545 // of the second element because it might make us access off the end of the 2546 // struct. 2547 if (HiStart != 8) { 2548 // There are usually two sorts of types the ABI generation code can produce 2549 // for the low part of a pair that aren't 8 bytes in size: half, float or 2550 // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and 2551 // NaCl). 2552 // Promote these to a larger type. 2553 if (Lo->isHalfTy() || Lo->isFloatTy()) 2554 Lo = llvm::Type::getDoubleTy(Lo->getContext()); 2555 else { 2556 assert((Lo->isIntegerTy() || Lo->isPointerTy()) 2557 && "Invalid/unknown lo type"); 2558 Lo = llvm::Type::getInt64Ty(Lo->getContext()); 2559 } 2560 } 2561 2562 llvm::StructType *Result = llvm::StructType::get(Lo, Hi); 2563 2564 // Verify that the second element is at an 8-byte offset. 2565 assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && 2566 "Invalid x86-64 argument pair!"); 2567 return Result; 2568 } 2569 2570 ABIArgInfo X86_64ABIInfo:: 2571 classifyReturnType(QualType RetTy) const { 2572 // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the 2573 // classification algorithm. 2574 X86_64ABIInfo::Class Lo, Hi; 2575 classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); 2576 2577 // Check some invariants. 2578 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2579 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2580 2581 llvm::Type *ResType = nullptr; 2582 switch (Lo) { 2583 case NoClass: 2584 if (Hi == NoClass) 2585 return ABIArgInfo::getIgnore(); 2586 // If the low part is just padding, it takes no register, leave ResType 2587 // null. 2588 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2589 "Unknown missing lo part"); 2590 break; 2591 2592 case SSEUp: 2593 case X87Up: 2594 llvm_unreachable("Invalid classification for lo word."); 2595 2596 // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via 2597 // hidden argument. 2598 case Memory: 2599 return getIndirectReturnResult(RetTy); 2600 2601 // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next 2602 // available register of the sequence %rax, %rdx is used. 2603 case Integer: 2604 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2605 2606 // If we have a sign or zero extended integer, make sure to return Extend 2607 // so that the parameter gets the right LLVM IR attributes. 2608 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2609 // Treat an enum type as its underlying type. 2610 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 2611 RetTy = EnumTy->getDecl()->getIntegerType(); 2612 2613 if (RetTy->isIntegralOrEnumerationType() && 2614 isPromotableIntegerTypeForABI(RetTy)) 2615 return ABIArgInfo::getExtend(RetTy); 2616 } 2617 break; 2618 2619 // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next 2620 // available SSE register of the sequence %xmm0, %xmm1 is used. 2621 case SSE: 2622 ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2623 break; 2624 2625 // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is 2626 // returned on the X87 stack in %st0 as 80-bit x87 number. 2627 case X87: 2628 ResType = llvm::Type::getX86_FP80Ty(getVMContext()); 2629 break; 2630 2631 // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real 2632 // part of the value is returned in %st0 and the imaginary part in 2633 // %st1. 2634 case ComplexX87: 2635 assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); 2636 ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), 2637 llvm::Type::getX86_FP80Ty(getVMContext())); 2638 break; 2639 } 2640 2641 llvm::Type *HighPart = nullptr; 2642 switch (Hi) { 2643 // Memory was handled previously and X87 should 2644 // never occur as a hi class. 2645 case Memory: 2646 case X87: 2647 llvm_unreachable("Invalid classification for hi word."); 2648 2649 case ComplexX87: // Previously handled. 2650 case NoClass: 2651 break; 2652 2653 case Integer: 2654 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2655 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2656 return ABIArgInfo::getDirect(HighPart, 8); 2657 break; 2658 case SSE: 2659 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2660 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2661 return ABIArgInfo::getDirect(HighPart, 8); 2662 break; 2663 2664 // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte 2665 // is passed in the next available eightbyte chunk if the last used 2666 // vector register. 2667 // 2668 // SSEUP should always be preceded by SSE, just widen. 2669 case SSEUp: 2670 assert(Lo == SSE && "Unexpected SSEUp classification."); 2671 ResType = GetByteVectorType(RetTy); 2672 break; 2673 2674 // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is 2675 // returned together with the previous X87 value in %st0. 2676 case X87Up: 2677 // If X87Up is preceded by X87, we don't need to do 2678 // anything. However, in some cases with unions it may not be 2679 // preceded by X87. In such situations we follow gcc and pass the 2680 // extra bits in an SSE reg. 2681 if (Lo != X87) { 2682 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2683 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2684 return ABIArgInfo::getDirect(HighPart, 8); 2685 } 2686 break; 2687 } 2688 2689 // If a high part was specified, merge it together with the low part. It is 2690 // known to pass in the high eightbyte of the result. We do this by forming a 2691 // first class struct aggregate with the high and low part: {low, high} 2692 if (HighPart) 2693 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2694 2695 return ABIArgInfo::getDirect(ResType); 2696 } 2697 2698 ABIArgInfo 2699 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, 2700 unsigned &neededInt, unsigned &neededSSE, 2701 bool isNamedArg, bool IsRegCall) const { 2702 Ty = useFirstFieldIfTransparentUnion(Ty); 2703 2704 X86_64ABIInfo::Class Lo, Hi; 2705 classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); 2706 2707 // Check some invariants. 2708 // FIXME: Enforce these by construction. 2709 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2710 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2711 2712 neededInt = 0; 2713 neededSSE = 0; 2714 llvm::Type *ResType = nullptr; 2715 switch (Lo) { 2716 case NoClass: 2717 if (Hi == NoClass) 2718 return ABIArgInfo::getIgnore(); 2719 // If the low part is just padding, it takes no register, leave ResType 2720 // null. 2721 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2722 "Unknown missing lo part"); 2723 break; 2724 2725 // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument 2726 // on the stack. 2727 case Memory: 2728 2729 // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or 2730 // COMPLEX_X87, it is passed in memory. 2731 case X87: 2732 case ComplexX87: 2733 if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) 2734 ++neededInt; 2735 return getIndirectResult(Ty, freeIntRegs); 2736 2737 case SSEUp: 2738 case X87Up: 2739 llvm_unreachable("Invalid classification for lo word."); 2740 2741 // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next 2742 // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 2743 // and %r9 is used. 2744 case Integer: 2745 ++neededInt; 2746 2747 // Pick an 8-byte type based on the preferred type. 2748 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); 2749 2750 // If we have a sign or zero extended integer, make sure to return Extend 2751 // so that the parameter gets the right LLVM IR attributes. 2752 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2753 // Treat an enum type as its underlying type. 2754 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2755 Ty = EnumTy->getDecl()->getIntegerType(); 2756 2757 if (Ty->isIntegralOrEnumerationType() && 2758 isPromotableIntegerTypeForABI(Ty)) 2759 return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); 2760 } 2761 2762 break; 2763 2764 // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next 2765 // available SSE register is used, the registers are taken in the 2766 // order from %xmm0 to %xmm7. 2767 case SSE: { 2768 llvm::Type *IRType = CGT.ConvertType(Ty); 2769 ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); 2770 ++neededSSE; 2771 break; 2772 } 2773 } 2774 2775 llvm::Type *HighPart = nullptr; 2776 switch (Hi) { 2777 // Memory was handled previously, ComplexX87 and X87 should 2778 // never occur as hi classes, and X87Up must be preceded by X87, 2779 // which is passed in memory. 2780 case Memory: 2781 case X87: 2782 case ComplexX87: 2783 llvm_unreachable("Invalid classification for hi word."); 2784 2785 case NoClass: break; 2786 2787 case Integer: 2788 ++neededInt; 2789 // Pick an 8-byte type based on the preferred type. 2790 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2791 2792 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2793 return ABIArgInfo::getDirect(HighPart, 8); 2794 break; 2795 2796 // X87Up generally doesn't occur here (long double is passed in 2797 // memory), except in situations involving unions. 2798 case X87Up: 2799 case SSE: 2800 ++neededSSE; 2801 HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2802 2803 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2804 return ABIArgInfo::getDirect(HighPart, 8); 2805 break; 2806 2807 // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the 2808 // eightbyte is passed in the upper half of the last used SSE 2809 // register. This only happens when 128-bit vectors are passed. 2810 case SSEUp: 2811 assert(Lo == SSE && "Unexpected SSEUp classification"); 2812 ResType = GetByteVectorType(Ty); 2813 break; 2814 } 2815 2816 // If a high part was specified, merge it together with the low part. It is 2817 // known to pass in the high eightbyte of the result. We do this by forming a 2818 // first class struct aggregate with the high and low part: {low, high} 2819 if (HighPart) 2820 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2821 2822 return ABIArgInfo::getDirect(ResType); 2823 } 2824 2825 ABIArgInfo 2826 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 2827 unsigned &NeededSSE, 2828 unsigned &MaxVectorWidth) const { 2829 auto RT = Ty->getAs<RecordType>(); 2830 assert(RT && "classifyRegCallStructType only valid with struct types"); 2831 2832 if (RT->getDecl()->hasFlexibleArrayMember()) 2833 return getIndirectReturnResult(Ty); 2834 2835 // Sum up bases 2836 if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { 2837 if (CXXRD->isDynamicClass()) { 2838 NeededInt = NeededSSE = 0; 2839 return getIndirectReturnResult(Ty); 2840 } 2841 2842 for (const auto &I : CXXRD->bases()) 2843 if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, 2844 MaxVectorWidth) 2845 .isIndirect()) { 2846 NeededInt = NeededSSE = 0; 2847 return getIndirectReturnResult(Ty); 2848 } 2849 } 2850 2851 // Sum up members 2852 for (const auto *FD : RT->getDecl()->fields()) { 2853 QualType MTy = FD->getType(); 2854 if (MTy->isRecordType() && !MTy->isUnionType()) { 2855 if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, 2856 MaxVectorWidth) 2857 .isIndirect()) { 2858 NeededInt = NeededSSE = 0; 2859 return getIndirectReturnResult(Ty); 2860 } 2861 } else { 2862 unsigned LocalNeededInt, LocalNeededSSE; 2863 if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, 2864 true, true) 2865 .isIndirect()) { 2866 NeededInt = NeededSSE = 0; 2867 return getIndirectReturnResult(Ty); 2868 } 2869 if (const auto *AT = getContext().getAsConstantArrayType(MTy)) 2870 MTy = AT->getElementType(); 2871 if (const auto *VT = MTy->getAs<VectorType>()) 2872 if (getContext().getTypeSize(VT) > MaxVectorWidth) 2873 MaxVectorWidth = getContext().getTypeSize(VT); 2874 NeededInt += LocalNeededInt; 2875 NeededSSE += LocalNeededSSE; 2876 } 2877 } 2878 2879 return ABIArgInfo::getDirect(); 2880 } 2881 2882 ABIArgInfo 2883 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 2884 unsigned &NeededSSE, 2885 unsigned &MaxVectorWidth) const { 2886 2887 NeededInt = 0; 2888 NeededSSE = 0; 2889 MaxVectorWidth = 0; 2890 2891 return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, 2892 MaxVectorWidth); 2893 } 2894 2895 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 2896 2897 const unsigned CallingConv = FI.getCallingConvention(); 2898 // It is possible to force Win64 calling convention on any x86_64 target by 2899 // using __attribute__((ms_abi)). In such case to correctly emit Win64 2900 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. 2901 if (CallingConv == llvm::CallingConv::Win64) { 2902 WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); 2903 Win64ABIInfo.computeInfo(FI); 2904 return; 2905 } 2906 2907 bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; 2908 2909 // Keep track of the number of assigned registers. 2910 unsigned FreeIntRegs = IsRegCall ? 11 : 6; 2911 unsigned FreeSSERegs = IsRegCall ? 16 : 8; 2912 unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; 2913 2914 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 2915 if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && 2916 !FI.getReturnType()->getTypePtr()->isUnionType()) { 2917 FI.getReturnInfo() = classifyRegCallStructType( 2918 FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); 2919 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2920 FreeIntRegs -= NeededInt; 2921 FreeSSERegs -= NeededSSE; 2922 } else { 2923 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2924 } 2925 } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && 2926 getContext().getCanonicalType(FI.getReturnType() 2927 ->getAs<ComplexType>() 2928 ->getElementType()) == 2929 getContext().LongDoubleTy) 2930 // Complex Long Double Type is passed in Memory when Regcall 2931 // calling convention is used. 2932 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2933 else 2934 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 2935 } 2936 2937 // If the return value is indirect, then the hidden argument is consuming one 2938 // integer register. 2939 if (FI.getReturnInfo().isIndirect()) 2940 --FreeIntRegs; 2941 else if (NeededSSE && MaxVectorWidth > 0) 2942 FI.setMaxVectorWidth(MaxVectorWidth); 2943 2944 // The chain argument effectively gives us another free register. 2945 if (FI.isChainCall()) 2946 ++FreeIntRegs; 2947 2948 unsigned NumRequiredArgs = FI.getNumRequiredArgs(); 2949 // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers 2950 // get assigned (in left-to-right order) for passing as follows... 2951 unsigned ArgNo = 0; 2952 for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); 2953 it != ie; ++it, ++ArgNo) { 2954 bool IsNamedArg = ArgNo < NumRequiredArgs; 2955 2956 if (IsRegCall && it->type->isStructureOrClassType()) 2957 it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, 2958 MaxVectorWidth); 2959 else 2960 it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, 2961 NeededSSE, IsNamedArg); 2962 2963 // AMD64-ABI 3.2.3p3: If there are no registers available for any 2964 // eightbyte of an argument, the whole argument is passed on the 2965 // stack. If registers have already been assigned for some 2966 // eightbytes of such an argument, the assignments get reverted. 2967 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2968 FreeIntRegs -= NeededInt; 2969 FreeSSERegs -= NeededSSE; 2970 if (MaxVectorWidth > FI.getMaxVectorWidth()) 2971 FI.setMaxVectorWidth(MaxVectorWidth); 2972 } else { 2973 it->info = getIndirectResult(it->type, FreeIntRegs); 2974 } 2975 } 2976 } 2977 2978 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, 2979 Address VAListAddr, QualType Ty) { 2980 Address overflow_arg_area_p = 2981 CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); 2982 llvm::Value *overflow_arg_area = 2983 CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); 2984 2985 // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 2986 // byte boundary if alignment needed by type exceeds 8 byte boundary. 2987 // It isn't stated explicitly in the standard, but in practice we use 2988 // alignment greater than 16 where necessary. 2989 CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); 2990 if (Align > CharUnits::fromQuantity(8)) { 2991 overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, 2992 Align); 2993 } 2994 2995 // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. 2996 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 2997 llvm::Value *Res = overflow_arg_area; 2998 2999 // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: 3000 // l->overflow_arg_area + sizeof(type). 3001 // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to 3002 // an 8 byte boundary. 3003 3004 uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; 3005 llvm::Value *Offset = 3006 llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); 3007 overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, 3008 Offset, "overflow_arg_area.next"); 3009 CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); 3010 3011 // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. 3012 return Address(Res, LTy, Align); 3013 } 3014 3015 RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3016 QualType Ty, AggValueSlot Slot) const { 3017 // Assume that va_list type is correct; should be pointer to LLVM type: 3018 // struct { 3019 // i32 gp_offset; 3020 // i32 fp_offset; 3021 // i8* overflow_arg_area; 3022 // i8* reg_save_area; 3023 // }; 3024 unsigned neededInt, neededSSE; 3025 3026 Ty = getContext().getCanonicalType(Ty); 3027 ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 3028 /*isNamedArg*/false); 3029 3030 // Empty records are ignored for parameter passing purposes. 3031 if (AI.isIgnore()) 3032 return Slot.asRValue(); 3033 3034 // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed 3035 // in the registers. If not go to step 7. 3036 if (!neededInt && !neededSSE) 3037 return CGF.EmitLoadOfAnyValue( 3038 CGF.MakeAddrLValue(EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty), Ty), 3039 Slot); 3040 3041 // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of 3042 // general purpose registers needed to pass type and num_fp to hold 3043 // the number of floating point registers needed. 3044 3045 // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into 3046 // registers. In the case: l->gp_offset > 48 - num_gp * 8 or 3047 // l->fp_offset > 304 - num_fp * 16 go to step 7. 3048 // 3049 // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of 3050 // register save space). 3051 3052 llvm::Value *InRegs = nullptr; 3053 Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); 3054 llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; 3055 if (neededInt) { 3056 gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); 3057 gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); 3058 InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); 3059 InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); 3060 } 3061 3062 if (neededSSE) { 3063 fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); 3064 fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); 3065 llvm::Value *FitsInFP = 3066 llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); 3067 FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); 3068 InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; 3069 } 3070 3071 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 3072 llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); 3073 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 3074 CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); 3075 3076 // Emit code to load the value if it was passed in registers. 3077 3078 CGF.EmitBlock(InRegBlock); 3079 3080 // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with 3081 // an offset of l->gp_offset and/or l->fp_offset. This may require 3082 // copying to a temporary location in case the parameter is passed 3083 // in different register classes or requires an alignment greater 3084 // than 8 for general purpose registers and 16 for XMM registers. 3085 // 3086 // FIXME: This really results in shameful code when we end up needing to 3087 // collect arguments from different places; often what should result in a 3088 // simple assembling of a structure from scattered addresses has many more 3089 // loads than necessary. Can we clean this up? 3090 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3091 llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( 3092 CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); 3093 3094 Address RegAddr = Address::invalid(); 3095 if (neededInt && neededSSE) { 3096 // FIXME: Cleanup. 3097 assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); 3098 llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); 3099 Address Tmp = CGF.CreateMemTemp(Ty); 3100 Tmp = Tmp.withElementType(ST); 3101 assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); 3102 llvm::Type *TyLo = ST->getElementType(0); 3103 llvm::Type *TyHi = ST->getElementType(1); 3104 assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && 3105 "Unexpected ABI info for mixed regs"); 3106 llvm::Value *GPAddr = 3107 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); 3108 llvm::Value *FPAddr = 3109 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); 3110 llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; 3111 llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; 3112 3113 // Copy the first element. 3114 // FIXME: Our choice of alignment here and below is probably pessimistic. 3115 llvm::Value *V = CGF.Builder.CreateAlignedLoad( 3116 TyLo, RegLoAddr, 3117 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); 3118 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3119 3120 // Copy the second element. 3121 V = CGF.Builder.CreateAlignedLoad( 3122 TyHi, RegHiAddr, 3123 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); 3124 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3125 3126 RegAddr = Tmp.withElementType(LTy); 3127 } else if (neededInt || neededSSE == 1) { 3128 // Copy to a temporary if necessary to ensure the appropriate alignment. 3129 auto TInfo = getContext().getTypeInfoInChars(Ty); 3130 uint64_t TySize = TInfo.Width.getQuantity(); 3131 CharUnits TyAlign = TInfo.Align; 3132 llvm::Type *CoTy = nullptr; 3133 if (AI.isDirect()) 3134 CoTy = AI.getCoerceToType(); 3135 3136 llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset; 3137 uint64_t Alignment = neededInt ? 8 : 16; 3138 uint64_t RegSize = neededInt ? neededInt * 8 : 16; 3139 // There are two cases require special handling: 3140 // 1) 3141 // ``` 3142 // struct { 3143 // struct {} a[8]; 3144 // int b; 3145 // }; 3146 // ``` 3147 // The lower 8 bytes of the structure are not stored, 3148 // so an 8-byte offset is needed when accessing the structure. 3149 // 2) 3150 // ``` 3151 // struct { 3152 // long long a; 3153 // struct {} b; 3154 // }; 3155 // ``` 3156 // The stored size of this structure is smaller than its actual size, 3157 // which may lead to reading past the end of the register save area. 3158 if (CoTy && (AI.getDirectOffset() == 8 || RegSize < TySize)) { 3159 Address Tmp = CGF.CreateMemTemp(Ty); 3160 llvm::Value *Addr = 3161 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset); 3162 llvm::Value *Src = CGF.Builder.CreateAlignedLoad(CoTy, Addr, TyAlign); 3163 llvm::Value *PtrOffset = 3164 llvm::ConstantInt::get(CGF.Int32Ty, AI.getDirectOffset()); 3165 Address Dst = Address( 3166 CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset), 3167 LTy, TyAlign); 3168 CGF.Builder.CreateStore(Src, Dst); 3169 RegAddr = Tmp.withElementType(LTy); 3170 } else { 3171 RegAddr = 3172 Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset), 3173 LTy, CharUnits::fromQuantity(Alignment)); 3174 3175 // Copy into a temporary if the type is more aligned than the 3176 // register save area. 3177 if (neededInt && TyAlign.getQuantity() > 8) { 3178 Address Tmp = CGF.CreateMemTemp(Ty); 3179 CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); 3180 RegAddr = Tmp; 3181 } 3182 } 3183 3184 } else { 3185 assert(neededSSE == 2 && "Invalid number of needed registers!"); 3186 // SSE registers are spaced 16 bytes apart in the register save 3187 // area, we need to collect the two eightbytes together. 3188 // The ABI isn't explicit about this, but it seems reasonable 3189 // to assume that the slots are 16-byte aligned, since the stack is 3190 // naturally 16-byte aligned and the prologue is expected to store 3191 // all the SSE registers to the RSA. 3192 Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 3193 fp_offset), 3194 CGF.Int8Ty, CharUnits::fromQuantity(16)); 3195 Address RegAddrHi = 3196 CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, 3197 CharUnits::fromQuantity(16)); 3198 llvm::Type *ST = AI.canHaveCoerceToType() 3199 ? AI.getCoerceToType() 3200 : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); 3201 llvm::Value *V; 3202 Address Tmp = CGF.CreateMemTemp(Ty); 3203 Tmp = Tmp.withElementType(ST); 3204 V = CGF.Builder.CreateLoad( 3205 RegAddrLo.withElementType(ST->getStructElementType(0))); 3206 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3207 V = CGF.Builder.CreateLoad( 3208 RegAddrHi.withElementType(ST->getStructElementType(1))); 3209 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3210 3211 RegAddr = Tmp.withElementType(LTy); 3212 } 3213 3214 // AMD64-ABI 3.5.7p5: Step 5. Set: 3215 // l->gp_offset = l->gp_offset + num_gp * 8 3216 // l->fp_offset = l->fp_offset + num_fp * 16. 3217 if (neededInt) { 3218 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); 3219 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), 3220 gp_offset_p); 3221 } 3222 if (neededSSE) { 3223 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); 3224 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), 3225 fp_offset_p); 3226 } 3227 CGF.EmitBranch(ContBlock); 3228 3229 // Emit code to load the value if it was passed in memory. 3230 3231 CGF.EmitBlock(InMemBlock); 3232 Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3233 3234 // Return the appropriate result. 3235 3236 CGF.EmitBlock(ContBlock); 3237 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, 3238 "vaarg.addr"); 3239 return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot); 3240 } 3241 3242 RValue X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 3243 QualType Ty, AggValueSlot Slot) const { 3244 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3245 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3246 uint64_t Width = getContext().getTypeSize(Ty); 3247 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3248 3249 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3250 CGF.getContext().getTypeInfoInChars(Ty), 3251 CharUnits::fromQuantity(8), 3252 /*allowHigherAlign*/ false, Slot); 3253 } 3254 3255 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( 3256 QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { 3257 const Type *Base = nullptr; 3258 uint64_t NumElts = 0; 3259 3260 if (!Ty->isBuiltinType() && !Ty->isVectorType() && 3261 isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { 3262 FreeSSERegs -= NumElts; 3263 return getDirectX86Hva(); 3264 } 3265 return current; 3266 } 3267 3268 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, 3269 bool IsReturnType, bool IsVectorCall, 3270 bool IsRegCall) const { 3271 3272 if (Ty->isVoidType()) 3273 return ABIArgInfo::getIgnore(); 3274 3275 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 3276 Ty = EnumTy->getDecl()->getIntegerType(); 3277 3278 TypeInfo Info = getContext().getTypeInfo(Ty); 3279 uint64_t Width = Info.Width; 3280 CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); 3281 3282 const RecordType *RT = Ty->getAs<RecordType>(); 3283 if (RT) { 3284 if (!IsReturnType) { 3285 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) 3286 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 3287 } 3288 3289 if (RT->getDecl()->hasFlexibleArrayMember()) 3290 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3291 3292 } 3293 3294 const Type *Base = nullptr; 3295 uint64_t NumElts = 0; 3296 // vectorcall adds the concept of a homogenous vector aggregate, similar to 3297 // other targets. 3298 if ((IsVectorCall || IsRegCall) && 3299 isHomogeneousAggregate(Ty, Base, NumElts)) { 3300 if (IsRegCall) { 3301 if (FreeSSERegs >= NumElts) { 3302 FreeSSERegs -= NumElts; 3303 if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) 3304 return ABIArgInfo::getDirect(); 3305 return ABIArgInfo::getExpand(); 3306 } 3307 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3308 } else if (IsVectorCall) { 3309 if (FreeSSERegs >= NumElts && 3310 (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { 3311 FreeSSERegs -= NumElts; 3312 return ABIArgInfo::getDirect(); 3313 } else if (IsReturnType) { 3314 return ABIArgInfo::getExpand(); 3315 } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { 3316 // HVAs are delayed and reclassified in the 2nd step. 3317 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3318 } 3319 } 3320 } 3321 3322 if (Ty->isMemberPointerType()) { 3323 // If the member pointer is represented by an LLVM int or ptr, pass it 3324 // directly. 3325 llvm::Type *LLTy = CGT.ConvertType(Ty); 3326 if (LLTy->isPointerTy() || LLTy->isIntegerTy()) 3327 return ABIArgInfo::getDirect(); 3328 } 3329 3330 if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { 3331 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3332 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3333 if (Width > 64 || !llvm::isPowerOf2_64(Width)) 3334 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3335 3336 // Otherwise, coerce it to a small integer. 3337 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); 3338 } 3339 3340 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 3341 switch (BT->getKind()) { 3342 case BuiltinType::Bool: 3343 // Bool type is always extended to the ABI, other builtin types are not 3344 // extended. 3345 return ABIArgInfo::getExtend(Ty); 3346 3347 case BuiltinType::LongDouble: 3348 // Mingw64 GCC uses the old 80 bit extended precision floating point 3349 // unit. It passes them indirectly through memory. 3350 if (IsMingw64) { 3351 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 3352 if (LDF == &llvm::APFloat::x87DoubleExtended()) 3353 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3354 } 3355 break; 3356 3357 case BuiltinType::Int128: 3358 case BuiltinType::UInt128: 3359 // If it's a parameter type, the normal ABI rule is that arguments larger 3360 // than 8 bytes are passed indirectly. GCC follows it. We follow it too, 3361 // even though it isn't particularly efficient. 3362 if (!IsReturnType) 3363 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3364 3365 // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. 3366 // Clang matches them for compatibility. 3367 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 3368 llvm::Type::getInt64Ty(getVMContext()), 2)); 3369 3370 default: 3371 break; 3372 } 3373 } 3374 3375 if (Ty->isBitIntType()) { 3376 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3377 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3378 // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, 3379 // or 8 bytes anyway as long is it fits in them, so we don't have to check 3380 // the power of 2. 3381 if (Width <= 64) 3382 return ABIArgInfo::getDirect(); 3383 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3384 } 3385 3386 return ABIArgInfo::getDirect(); 3387 } 3388 3389 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 3390 const unsigned CC = FI.getCallingConvention(); 3391 bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; 3392 bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; 3393 3394 // If __attribute__((sysv_abi)) is in use, use the SysV argument 3395 // classification rules. 3396 if (CC == llvm::CallingConv::X86_64_SysV) { 3397 X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); 3398 SysVABIInfo.computeInfo(FI); 3399 return; 3400 } 3401 3402 unsigned FreeSSERegs = 0; 3403 if (IsVectorCall) { 3404 // We can use up to 4 SSE return registers with vectorcall. 3405 FreeSSERegs = 4; 3406 } else if (IsRegCall) { 3407 // RegCall gives us 16 SSE registers. 3408 FreeSSERegs = 16; 3409 } 3410 3411 if (!getCXXABI().classifyReturnType(FI)) 3412 FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, 3413 IsVectorCall, IsRegCall); 3414 3415 if (IsVectorCall) { 3416 // We can use up to 6 SSE register parameters with vectorcall. 3417 FreeSSERegs = 6; 3418 } else if (IsRegCall) { 3419 // RegCall gives us 16 SSE registers, we can reuse the return registers. 3420 FreeSSERegs = 16; 3421 } 3422 3423 unsigned ArgNum = 0; 3424 unsigned ZeroSSERegs = 0; 3425 for (auto &I : FI.arguments()) { 3426 // Vectorcall in x64 only permits the first 6 arguments to be passed as 3427 // XMM/YMM registers. After the sixth argument, pretend no vector 3428 // registers are left. 3429 unsigned *MaybeFreeSSERegs = 3430 (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; 3431 I.info = 3432 classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); 3433 ++ArgNum; 3434 } 3435 3436 if (IsVectorCall) { 3437 // For vectorcall, assign aggregate HVAs to any free vector registers in a 3438 // second pass. 3439 for (auto &I : FI.arguments()) 3440 I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); 3441 } 3442 } 3443 3444 RValue WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3445 QualType Ty, AggValueSlot Slot) const { 3446 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3447 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3448 uint64_t Width = getContext().getTypeSize(Ty); 3449 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3450 3451 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3452 CGF.getContext().getTypeInfoInChars(Ty), 3453 CharUnits::fromQuantity(8), 3454 /*allowHigherAlign*/ false, Slot); 3455 } 3456 3457 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( 3458 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3459 unsigned NumRegisterParameters, bool SoftFloatABI) { 3460 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3461 CGM.getTriple(), CGM.getCodeGenOpts()); 3462 return std::make_unique<X86_32TargetCodeGenInfo>( 3463 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3464 NumRegisterParameters, SoftFloatABI); 3465 } 3466 3467 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( 3468 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3469 unsigned NumRegisterParameters) { 3470 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3471 CGM.getTriple(), CGM.getCodeGenOpts()); 3472 return std::make_unique<WinX86_32TargetCodeGenInfo>( 3473 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3474 NumRegisterParameters); 3475 } 3476 3477 std::unique_ptr<TargetCodeGenInfo> 3478 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3479 X86AVXABILevel AVXLevel) { 3480 return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3481 } 3482 3483 std::unique_ptr<TargetCodeGenInfo> 3484 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3485 X86AVXABILevel AVXLevel) { 3486 return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3487 } 3488