xref: /llvm-project/flang/lib/Optimizer/CodeGen/Target.cpp (revision 480e7f0667794822f7f3a065bed73d9a2ecc2d58)
1 //===-- Target.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/CodeGen/Target.h"
14 #include "flang/Optimizer/Builder/Todo.h"
15 #include "flang/Optimizer/Dialect/FIRType.h"
16 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
17 #include "flang/Optimizer/Support/FatalError.h"
18 #include "flang/Optimizer/Support/Utils.h"
19 #include "mlir/IR/BuiltinTypes.h"
20 #include "mlir/IR/TypeRange.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 
23 #define DEBUG_TYPE "flang-codegen-target"
24 
25 using namespace fir;
26 
27 namespace fir::details {
28 llvm::StringRef Attributes::getIntExtensionAttrName() const {
29   // The attribute names are available via LLVM dialect interfaces
30   // like getZExtAttrName(), getByValAttrName(), etc., so we'd better
31   // use them than literals.
32   if (isZeroExt())
33     return "llvm.zeroext";
34   else if (isSignExt())
35     return "llvm.signext";
36   return {};
37 }
38 } // namespace fir::details
39 
40 // Reduce a REAL/float type to the floating point semantics.
41 static const llvm::fltSemantics &floatToSemantics(const KindMapping &kindMap,
42                                                   mlir::Type type) {
43   assert(isa_real(type));
44   return mlir::cast<mlir::FloatType>(type).getFloatSemantics();
45 }
46 
47 static void typeTodo(const llvm::fltSemantics *sem, mlir::Location loc,
48                      const std::string &context) {
49   if (sem == &llvm::APFloat::IEEEhalf()) {
50     TODO(loc, "COMPLEX(KIND=2): for " + context + " type");
51   } else if (sem == &llvm::APFloat::BFloat()) {
52     TODO(loc, "COMPLEX(KIND=3): " + context + " type");
53   } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
54     TODO(loc, "COMPLEX(KIND=10): " + context + " type");
55   } else {
56     TODO(loc, "complex for this precision for " + context + " type");
57   }
58 }
59 
60 namespace {
61 template <typename S>
62 struct GenericTarget : public CodeGenSpecifics {
63   using CodeGenSpecifics::CodeGenSpecifics;
64   using AT = CodeGenSpecifics::Attributes;
65 
66   mlir::Type complexMemoryType(mlir::Type eleTy) const override {
67     assert(fir::isa_real(eleTy));
68     // Use a type that will be translated into LLVM as:
69     // { t, t }   struct of 2 eleTy
70     return mlir::TupleType::get(eleTy.getContext(),
71                                 mlir::TypeRange{eleTy, eleTy});
72   }
73 
74   mlir::Type boxcharMemoryType(mlir::Type eleTy) const override {
75     auto idxTy = mlir::IntegerType::get(eleTy.getContext(), S::defaultWidth);
76     auto ptrTy = fir::ReferenceType::get(eleTy);
77     // Use a type that will be translated into LLVM as:
78     // { t*, index }
79     return mlir::TupleType::get(eleTy.getContext(),
80                                 mlir::TypeRange{ptrTy, idxTy});
81   }
82 
83   Marshalling boxcharArgumentType(mlir::Type eleTy, bool sret) const override {
84     CodeGenSpecifics::Marshalling marshal;
85     auto idxTy = mlir::IntegerType::get(eleTy.getContext(), S::defaultWidth);
86     auto ptrTy = fir::ReferenceType::get(eleTy);
87     marshal.emplace_back(ptrTy, AT{});
88     // Return value arguments are grouped as a pair. Others are passed in a
89     // split format with all pointers first (in the declared position) and all
90     // LEN arguments appended after all of the dummy arguments.
91     // NB: Other conventions/ABIs can/should be supported via options.
92     marshal.emplace_back(idxTy, AT{/*alignment=*/0, /*byval=*/false,
93                                    /*sret=*/sret, /*append=*/!sret});
94     return marshal;
95   }
96 
97   CodeGenSpecifics::Marshalling
98   structArgumentType(mlir::Location loc, fir::RecordType,
99                      const Marshalling &) const override {
100     TODO(loc, "passing VALUE BIND(C) derived type for this target");
101   }
102 
103   CodeGenSpecifics::Marshalling
104   structReturnType(mlir::Location loc, fir::RecordType ty) const override {
105     TODO(loc, "returning BIND(C) derived type for this target");
106   }
107 
108   CodeGenSpecifics::Marshalling
109   integerArgumentType(mlir::Location loc,
110                       mlir::IntegerType argTy) const override {
111     CodeGenSpecifics::Marshalling marshal;
112     AT::IntegerExtension intExt = AT::IntegerExtension::None;
113     if (argTy.getWidth() < getCIntTypeWidth()) {
114       // isSigned() and isUnsigned() branches below are dead code currently.
115       // If needed, we can generate calls with signed/unsigned argument types
116       // to more precisely match C side (e.g. for Fortran runtime functions
117       // with 'unsigned short' arguments).
118       if (argTy.isSigned())
119         intExt = AT::IntegerExtension::Sign;
120       else if (argTy.isUnsigned())
121         intExt = AT::IntegerExtension::Zero;
122       else if (argTy.isSignless()) {
123         // Zero extend for 'i1' and sign extend for other types.
124         if (argTy.getWidth() == 1)
125           intExt = AT::IntegerExtension::Zero;
126         else
127           intExt = AT::IntegerExtension::Sign;
128       }
129     }
130 
131     marshal.emplace_back(argTy, AT{/*alignment=*/0, /*byval=*/false,
132                                    /*sret=*/false, /*append=*/false,
133                                    /*intExt=*/intExt});
134     return marshal;
135   }
136 
137   CodeGenSpecifics::Marshalling
138   integerReturnType(mlir::Location loc,
139                     mlir::IntegerType argTy) const override {
140     return integerArgumentType(loc, argTy);
141   }
142 
143   // Width of 'int' type is 32-bits for almost all targets, except
144   // for AVR and MSP430 (see TargetInfo initializations
145   // in clang/lib/Basic/Targets).
146   unsigned char getCIntTypeWidth() const override { return 32; }
147 };
148 } // namespace
149 
150 //===----------------------------------------------------------------------===//
151 // i386 (x86 32 bit) linux target specifics.
152 //===----------------------------------------------------------------------===//
153 
154 namespace {
155 struct TargetI386 : public GenericTarget<TargetI386> {
156   using GenericTarget::GenericTarget;
157 
158   static constexpr int defaultWidth = 32;
159 
160   CodeGenSpecifics::Marshalling
161   complexArgumentType(mlir::Location, mlir::Type eleTy) const override {
162     assert(fir::isa_real(eleTy));
163     CodeGenSpecifics::Marshalling marshal;
164     // Use a type that will be translated into LLVM as:
165     // { t, t }   struct of 2 eleTy, byval, align 4
166     auto structTy =
167         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy});
168     marshal.emplace_back(fir::ReferenceType::get(structTy),
169                          AT{/*alignment=*/4, /*byval=*/true});
170     return marshal;
171   }
172 
173   CodeGenSpecifics::Marshalling
174   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
175     assert(fir::isa_real(eleTy));
176     CodeGenSpecifics::Marshalling marshal;
177     const auto *sem = &floatToSemantics(kindMap, eleTy);
178     if (sem == &llvm::APFloat::IEEEsingle()) {
179       // i64   pack both floats in a 64-bit GPR
180       marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
181                            AT{});
182     } else if (sem == &llvm::APFloat::IEEEdouble()) {
183       // Use a type that will be translated into LLVM as:
184       // { t, t }   struct of 2 eleTy, sret, align 4
185       auto structTy = mlir::TupleType::get(eleTy.getContext(),
186                                            mlir::TypeRange{eleTy, eleTy});
187       marshal.emplace_back(fir::ReferenceType::get(structTy),
188                            AT{/*alignment=*/4, /*byval=*/false, /*sret=*/true});
189     } else {
190       typeTodo(sem, loc, "return");
191     }
192     return marshal;
193   }
194 };
195 } // namespace
196 
197 //===----------------------------------------------------------------------===//
198 // i386 (x86 32 bit) Windows target specifics.
199 //===----------------------------------------------------------------------===//
200 
201 namespace {
202 struct TargetI386Win : public GenericTarget<TargetI386Win> {
203   using GenericTarget::GenericTarget;
204 
205   static constexpr int defaultWidth = 32;
206 
207   CodeGenSpecifics::Marshalling
208   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
209     CodeGenSpecifics::Marshalling marshal;
210     // Use a type that will be translated into LLVM as:
211     // { t, t }   struct of 2 eleTy, byval, align 4
212     auto structTy =
213         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy});
214     marshal.emplace_back(fir::ReferenceType::get(structTy),
215                          AT{/*align=*/4, /*byval=*/true});
216     return marshal;
217   }
218 
219   CodeGenSpecifics::Marshalling
220   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
221     CodeGenSpecifics::Marshalling marshal;
222     const auto *sem = &floatToSemantics(kindMap, eleTy);
223     if (sem == &llvm::APFloat::IEEEsingle()) {
224       // i64   pack both floats in a 64-bit GPR
225       marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
226                            AT{});
227     } else if (sem == &llvm::APFloat::IEEEdouble()) {
228       // Use a type that will be translated into LLVM as:
229       // { double, double }   struct of 2 double, sret, align 8
230       marshal.emplace_back(
231           fir::ReferenceType::get(mlir::TupleType::get(
232               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
233           AT{/*align=*/8, /*byval=*/false, /*sret=*/true});
234     } else if (sem == &llvm::APFloat::IEEEquad()) {
235       // Use a type that will be translated into LLVM as:
236       // { fp128, fp128 }   struct of 2 fp128, sret, align 16
237       marshal.emplace_back(
238           fir::ReferenceType::get(mlir::TupleType::get(
239               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
240           AT{/*align=*/16, /*byval=*/false, /*sret=*/true});
241     } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
242       // Use a type that will be translated into LLVM as:
243       // { x86_fp80, x86_fp80 }   struct of 2 x86_fp80, sret, align 4
244       marshal.emplace_back(
245           fir::ReferenceType::get(mlir::TupleType::get(
246               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
247           AT{/*align=*/4, /*byval=*/false, /*sret=*/true});
248     } else {
249       typeTodo(sem, loc, "return");
250     }
251     return marshal;
252   }
253 };
254 } // namespace
255 
256 //===----------------------------------------------------------------------===//
257 // x86_64 (x86 64 bit) linux target specifics.
258 //===----------------------------------------------------------------------===//
259 
260 namespace {
261 struct TargetX86_64 : public GenericTarget<TargetX86_64> {
262   using GenericTarget::GenericTarget;
263 
264   static constexpr int defaultWidth = 64;
265 
266   CodeGenSpecifics::Marshalling
267   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
268     CodeGenSpecifics::Marshalling marshal;
269     const auto *sem = &floatToSemantics(kindMap, eleTy);
270     if (sem == &llvm::APFloat::IEEEsingle()) {
271       // <2 x t>   vector of 2 eleTy
272       marshal.emplace_back(fir::VectorType::get(2, eleTy), AT{});
273     } else if (sem == &llvm::APFloat::IEEEdouble()) {
274       // FIXME: In case of SSE register exhaustion, the ABI here may be
275       // incorrect since LLVM may pass the real via register and the imaginary
276       // part via the stack while the ABI it should be all in register or all
277       // in memory. Register occupancy must be analyzed here.
278       // two distinct double arguments
279       marshal.emplace_back(eleTy, AT{});
280       marshal.emplace_back(eleTy, AT{});
281     } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
282       // Use a type that will be translated into LLVM as:
283       // { x86_fp80, x86_fp80 }  struct of 2 fp128, byval, align 16
284       marshal.emplace_back(
285           fir::ReferenceType::get(mlir::TupleType::get(
286               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
287           AT{/*align=*/16, /*byval=*/true});
288     } else if (sem == &llvm::APFloat::IEEEquad()) {
289       // Use a type that will be translated into LLVM as:
290       // { fp128, fp128 }   struct of 2 fp128, byval, align 16
291       marshal.emplace_back(
292           fir::ReferenceType::get(mlir::TupleType::get(
293               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
294           AT{/*align=*/16, /*byval=*/true});
295     } else {
296       typeTodo(sem, loc, "argument");
297     }
298     return marshal;
299   }
300 
301   CodeGenSpecifics::Marshalling
302   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
303     CodeGenSpecifics::Marshalling marshal;
304     const auto *sem = &floatToSemantics(kindMap, eleTy);
305     if (sem == &llvm::APFloat::IEEEsingle()) {
306       // <2 x t>   vector of 2 eleTy
307       marshal.emplace_back(fir::VectorType::get(2, eleTy), AT{});
308     } else if (sem == &llvm::APFloat::IEEEdouble()) {
309       // Use a type that will be translated into LLVM as:
310       // { double, double }   struct of 2 double
311       marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
312                                                 mlir::TypeRange{eleTy, eleTy}),
313                            AT{});
314     } else if (sem == &llvm::APFloat::x87DoubleExtended()) {
315       // { x86_fp80, x86_fp80 }
316       marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
317                                                 mlir::TypeRange{eleTy, eleTy}),
318                            AT{});
319     } else if (sem == &llvm::APFloat::IEEEquad()) {
320       // Use a type that will be translated into LLVM as:
321       // { fp128, fp128 }   struct of 2 fp128, sret, align 16
322       marshal.emplace_back(
323           fir::ReferenceType::get(mlir::TupleType::get(
324               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
325           AT{/*align=*/16, /*byval=*/false, /*sret=*/true});
326     } else {
327       typeTodo(sem, loc, "return");
328     }
329     return marshal;
330   }
331 
332   /// X86-64 argument classes from System V ABI version 1.0 section 3.2.3.
333   enum ArgClass {
334     Integer = 0,
335     SSE,
336     SSEUp,
337     X87,
338     X87Up,
339     ComplexX87,
340     NoClass,
341     Memory
342   };
343 
344   /// Classify an argument type or a field of an aggregate type argument.
345   /// See System V ABI version 1.0 section 3.2.3.
346   /// The Lo and Hi class are set to the class of the lower eight eightbytes
347   /// and upper eight eightbytes on return.
348   /// If this is called for an aggregate field, the caller is responsible to
349   /// do the post-merge.
350   void classify(mlir::Location loc, mlir::Type type, std::uint64_t byteOffset,
351                 ArgClass &Lo, ArgClass &Hi) const {
352     Hi = Lo = ArgClass::NoClass;
353     ArgClass &current = byteOffset < 8 ? Lo : Hi;
354     // System V AMD64 ABI 3.2.3. version 1.0
355     llvm::TypeSwitch<mlir::Type>(type)
356         .template Case<mlir::IntegerType>([&](mlir::IntegerType intTy) {
357           if (intTy.getWidth() == 128)
358             Hi = Lo = ArgClass::Integer;
359           else
360             current = ArgClass::Integer;
361         })
362         .template Case<mlir::FloatType>([&](mlir::Type floatTy) {
363           const auto *sem = &floatToSemantics(kindMap, floatTy);
364           if (sem == &llvm::APFloat::x87DoubleExtended()) {
365             Lo = ArgClass::X87;
366             Hi = ArgClass::X87Up;
367           } else if (sem == &llvm::APFloat::IEEEquad()) {
368             Lo = ArgClass::SSE;
369             Hi = ArgClass::SSEUp;
370           } else {
371             current = ArgClass::SSE;
372           }
373         })
374         .template Case<mlir::ComplexType>([&](mlir::ComplexType cmplx) {
375           const auto *sem = &floatToSemantics(kindMap, cmplx.getElementType());
376           if (sem == &llvm::APFloat::x87DoubleExtended()) {
377             current = ArgClass::ComplexX87;
378           } else {
379             fir::SequenceType::Shape shape{2};
380             classifyArray(loc,
381                           fir::SequenceType::get(shape, cmplx.getElementType()),
382                           byteOffset, Lo, Hi);
383           }
384         })
385         .template Case<fir::LogicalType>([&](fir::LogicalType logical) {
386           if (kindMap.getLogicalBitsize(logical.getFKind()) == 128)
387             Hi = Lo = ArgClass::Integer;
388           else
389             current = ArgClass::Integer;
390         })
391         .template Case<fir::CharacterType>(
392             [&](fir::CharacterType character) { current = ArgClass::Integer; })
393         .template Case<fir::SequenceType>([&](fir::SequenceType seqTy) {
394           // Array component.
395           classifyArray(loc, seqTy, byteOffset, Lo, Hi);
396         })
397         .template Case<fir::RecordType>([&](fir::RecordType recTy) {
398           // Component that is a derived type.
399           classifyStruct(loc, recTy, byteOffset, Lo, Hi);
400         })
401         .template Case<fir::VectorType>([&](fir::VectorType vecTy) {
402           // Previously marshalled SSE eight byte for a previous struct
403           // argument.
404           auto *sem = fir::isa_real(vecTy.getEleTy())
405                           ? &floatToSemantics(kindMap, vecTy.getEleTy())
406                           : nullptr;
407           // Not expecting to hit this todo in standard code (it would
408           // require some vector type extension).
409           if (!(sem == &llvm::APFloat::IEEEsingle() && vecTy.getLen() <= 2) &&
410               !(sem == &llvm::APFloat::IEEEhalf() && vecTy.getLen() <= 4))
411             TODO(loc, "passing vector argument to C by value");
412           current = SSE;
413         })
414         .Default([&](mlir::Type ty) {
415           if (fir::conformsWithPassByRef(ty))
416             current = ArgClass::Integer; // Pointers.
417           else
418             TODO(loc, "unsupported component type for BIND(C), VALUE derived "
419                       "type argument");
420         });
421   }
422 
423   // Classify fields of a derived type starting at \p offset. Returns the new
424   // offset. Post-merge is left to the caller.
425   std::uint64_t classifyStruct(mlir::Location loc, fir::RecordType recTy,
426                                std::uint64_t byteOffset, ArgClass &Lo,
427                                ArgClass &Hi) const {
428     for (auto component : recTy.getTypeList()) {
429       if (byteOffset > 16) {
430         // See 3.2.3 p. 1 and note 15. Note that when the offset is bigger
431         // than 16 bytes here, it is not a single _m256 and or _m512 entity
432         // that could fit in AVX registers.
433         Lo = Hi = ArgClass::Memory;
434         return byteOffset;
435       }
436       mlir::Type compType = component.second;
437       auto [compSize, compAlign] = fir::getTypeSizeAndAlignmentOrCrash(
438           loc, compType, getDataLayout(), kindMap);
439       byteOffset = llvm::alignTo(byteOffset, compAlign);
440       ArgClass LoComp, HiComp;
441       classify(loc, compType, byteOffset, LoComp, HiComp);
442       Lo = mergeClass(Lo, LoComp);
443       Hi = mergeClass(Hi, HiComp);
444       byteOffset = byteOffset + llvm::alignTo(compSize, compAlign);
445       if (Lo == ArgClass::Memory || Hi == ArgClass::Memory)
446         return byteOffset;
447     }
448     return byteOffset;
449   }
450 
451   // Classify fields of a constant size array type starting at \p offset.
452   // Returns the new offset. Post-merge is left to the caller.
453   void classifyArray(mlir::Location loc, fir::SequenceType seqTy,
454                      std::uint64_t byteOffset, ArgClass &Lo,
455                      ArgClass &Hi) const {
456     mlir::Type eleTy = seqTy.getEleTy();
457     const std::uint64_t arraySize = seqTy.getConstantArraySize();
458     auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash(
459         loc, eleTy, getDataLayout(), kindMap);
460     std::uint64_t eleStorageSize = llvm::alignTo(eleSize, eleAlign);
461     for (std::uint64_t i = 0; i < arraySize; ++i) {
462       byteOffset = llvm::alignTo(byteOffset, eleAlign);
463       if (byteOffset > 16) {
464         // See 3.2.3 p. 1 and note 15. Same as in classifyStruct.
465         Lo = Hi = ArgClass::Memory;
466         return;
467       }
468       ArgClass LoComp, HiComp;
469       classify(loc, eleTy, byteOffset, LoComp, HiComp);
470       Lo = mergeClass(Lo, LoComp);
471       Hi = mergeClass(Hi, HiComp);
472       byteOffset = byteOffset + eleStorageSize;
473       if (Lo == ArgClass::Memory || Hi == ArgClass::Memory)
474         return;
475     }
476   }
477 
478   // Goes through the previously marshalled arguments and count the
479   // register occupancy to check if there are enough registers left.
480   bool hasEnoughRegisters(mlir::Location loc, int neededIntRegisters,
481                           int neededSSERegisters,
482                           const Marshalling &previousArguments) const {
483     int availIntRegisters = 6;
484     int availSSERegisters = 8;
485     for (auto typeAndAttr : previousArguments) {
486       const auto &attr = std::get<Attributes>(typeAndAttr);
487       if (attr.isByVal())
488         continue; // Previous argument passed on the stack.
489       ArgClass Lo, Hi;
490       Lo = Hi = ArgClass::NoClass;
491       classify(loc, std::get<mlir::Type>(typeAndAttr), 0, Lo, Hi);
492       // post merge is not needed here since previous aggregate arguments
493       // were marshalled into simpler arguments.
494       if (Lo == ArgClass::Integer)
495         --availIntRegisters;
496       else if (Lo == SSE)
497         --availSSERegisters;
498       if (Hi == ArgClass::Integer)
499         --availIntRegisters;
500       else if (Hi == ArgClass::SSE)
501         --availSSERegisters;
502     }
503     return availSSERegisters >= neededSSERegisters &&
504            availIntRegisters >= neededIntRegisters;
505   }
506 
507   /// Argument class merging as described in System V ABI 3.2.3 point 4.
508   ArgClass mergeClass(ArgClass accum, ArgClass field) const {
509     assert((accum != ArgClass::Memory && accum != ArgClass::ComplexX87) &&
510            "Invalid accumulated classification during merge.");
511     if (accum == field || field == NoClass)
512       return accum;
513     if (field == ArgClass::Memory)
514       return ArgClass::Memory;
515     if (accum == NoClass)
516       return field;
517     if (accum == Integer || field == Integer)
518       return ArgClass::Integer;
519     if (field == ArgClass::X87 || field == ArgClass::X87Up ||
520         field == ArgClass::ComplexX87 || accum == ArgClass::X87 ||
521         accum == ArgClass::X87Up)
522       return Memory;
523     return SSE;
524   }
525 
526   /// Argument class post merging as described in System V ABI 3.2.3 point 5.
527   void postMerge(std::uint64_t byteSize, ArgClass &Lo, ArgClass &Hi) const {
528     if (Hi == ArgClass::Memory)
529       Lo = ArgClass::Memory;
530     if (Hi == ArgClass::X87Up && Lo != ArgClass::X87)
531       Lo = ArgClass::Memory;
532     if (byteSize > 16 && (Lo != ArgClass::SSE || Hi != ArgClass::SSEUp))
533       Lo = ArgClass::Memory;
534     if (Hi == ArgClass::SSEUp && Lo != ArgClass::SSE)
535       Hi = SSE;
536   }
537 
538   /// When \p recTy is a one field record type that can be passed
539   /// like the field on its own, returns the field type. Returns
540   /// a null type otherwise.
541   mlir::Type passAsFieldIfOneFieldStruct(fir::RecordType recTy,
542                                          bool allowComplex = false) const {
543     auto typeList = recTy.getTypeList();
544     if (typeList.size() != 1)
545       return {};
546     mlir::Type fieldType = typeList[0].second;
547     if (mlir::isa<mlir::FloatType, mlir::IntegerType, fir::LogicalType>(
548             fieldType))
549       return fieldType;
550     if (allowComplex && mlir::isa<mlir::ComplexType>(fieldType))
551       return fieldType;
552     if (mlir::isa<fir::CharacterType>(fieldType)) {
553       // Only CHARACTER(1) are expected in BIND(C) contexts, which is the only
554       // contexts where derived type may be passed in registers.
555       assert(mlir::cast<fir::CharacterType>(fieldType).getLen() == 1 &&
556              "fir.type value arg character components must have length 1");
557       return fieldType;
558     }
559     // Complex field that needs to be split, or array.
560     return {};
561   }
562 
563   mlir::Type pickLLVMArgType(mlir::Location loc, mlir::MLIRContext *context,
564                              ArgClass argClass,
565                              std::uint64_t partByteSize) const {
566     if (argClass == ArgClass::SSE) {
567       if (partByteSize > 16)
568         TODO(loc, "passing struct as a real > 128 bits in register");
569       // Clang uses vector type when several fp fields are marshalled
570       // into a single SSE register (like  <n x smallest fp field> ).
571       // It should make no difference from an ABI point of view to just
572       // select an fp type of the right size, and it makes things simpler
573       // here.
574       if (partByteSize > 8)
575         return mlir::FloatType::getF128(context);
576       if (partByteSize > 4)
577         return mlir::FloatType::getF64(context);
578       if (partByteSize > 2)
579         return mlir::FloatType::getF32(context);
580       return mlir::FloatType::getF16(context);
581     }
582     assert(partByteSize <= 8 &&
583            "expect integer part of aggregate argument to fit into eight bytes");
584     if (partByteSize > 4)
585       return mlir::IntegerType::get(context, 64);
586     if (partByteSize > 2)
587       return mlir::IntegerType::get(context, 32);
588     if (partByteSize > 1)
589       return mlir::IntegerType::get(context, 16);
590     return mlir::IntegerType::get(context, 8);
591   }
592 
593   /// Marshal a derived type passed by value like a C struct.
594   CodeGenSpecifics::Marshalling
595   structArgumentType(mlir::Location loc, fir::RecordType recTy,
596                      const Marshalling &previousArguments) const override {
597     std::uint64_t byteOffset = 0;
598     ArgClass Lo, Hi;
599     Lo = Hi = ArgClass::NoClass;
600     byteOffset = classifyStruct(loc, recTy, byteOffset, Lo, Hi);
601     postMerge(byteOffset, Lo, Hi);
602     if (Lo == ArgClass::Memory || Lo == ArgClass::X87 ||
603         Lo == ArgClass::ComplexX87)
604       return passOnTheStack(loc, recTy, /*isResult=*/false);
605     int neededIntRegisters = 0;
606     int neededSSERegisters = 0;
607     if (Lo == ArgClass::SSE)
608       ++neededSSERegisters;
609     else if (Lo == ArgClass::Integer)
610       ++neededIntRegisters;
611     if (Hi == ArgClass::SSE)
612       ++neededSSERegisters;
613     else if (Hi == ArgClass::Integer)
614       ++neededIntRegisters;
615     // C struct should not be split into LLVM registers if LLVM codegen is not
616     // able to later assign actual registers to all of them (struct passing is
617     // all in registers or all on the stack).
618     if (!hasEnoughRegisters(loc, neededIntRegisters, neededSSERegisters,
619                             previousArguments))
620       return passOnTheStack(loc, recTy, /*isResult=*/false);
621 
622     if (auto fieldType = passAsFieldIfOneFieldStruct(recTy)) {
623       CodeGenSpecifics::Marshalling marshal;
624       marshal.emplace_back(fieldType, AT{});
625       return marshal;
626     }
627     if (Hi == ArgClass::NoClass || Hi == ArgClass::SSEUp) {
628       // Pass a single integer or floating point argument.
629       mlir::Type lowType =
630           pickLLVMArgType(loc, recTy.getContext(), Lo, byteOffset);
631       CodeGenSpecifics::Marshalling marshal;
632       marshal.emplace_back(lowType, AT{});
633       return marshal;
634     }
635     // Split into two integer or floating point arguments.
636     // Note that for the first argument, this will always pick i64 or f64 which
637     // may be bigger than needed if some struct padding ends the first eight
638     // byte (e.g. for `{i32, f64}`). It is valid from an X86-64 ABI and
639     // semantic point of view, but it may not match the LLVM IR interface clang
640     // would produce for the equivalent C code (the assembly will still be
641     // compatible).  This allows keeping the logic simpler here since it
642     // avoids computing the "data" size of the Lo part.
643     mlir::Type lowType = pickLLVMArgType(loc, recTy.getContext(), Lo, 8u);
644     mlir::Type hiType =
645         pickLLVMArgType(loc, recTy.getContext(), Hi, byteOffset - 8u);
646     CodeGenSpecifics::Marshalling marshal;
647     marshal.emplace_back(lowType, AT{});
648     marshal.emplace_back(hiType, AT{});
649     return marshal;
650   }
651 
652   CodeGenSpecifics::Marshalling
653   structReturnType(mlir::Location loc, fir::RecordType recTy) const override {
654     std::uint64_t byteOffset = 0;
655     ArgClass Lo, Hi;
656     Lo = Hi = ArgClass::NoClass;
657     byteOffset = classifyStruct(loc, recTy, byteOffset, Lo, Hi);
658     mlir::MLIRContext *context = recTy.getContext();
659     postMerge(byteOffset, Lo, Hi);
660     if (Lo == ArgClass::Memory)
661       return passOnTheStack(loc, recTy, /*isResult=*/true);
662 
663     // Note that X87/ComplexX87 are passed in memory, but returned via %st0
664     // %st1 registers. Here, they are returned as fp80 or {fp80, fp80} by
665     // passAsFieldIfOneFieldStruct, and LLVM will use the expected registers.
666 
667     // Note that {_Complex long double} is not 100% clear from an ABI
668     // perspective because the aggregate post merger rules say it should be
669     // passed in memory because it is bigger than 2 eight bytes. This has the
670     // funny effect of
671     // {_Complex long double} return to be dealt with differently than
672     // _Complex long double.
673 
674     if (auto fieldType =
675             passAsFieldIfOneFieldStruct(recTy, /*allowComplex=*/true)) {
676       if (auto complexType = mlir::dyn_cast<mlir::ComplexType>(fieldType))
677         return complexReturnType(loc, complexType.getElementType());
678       CodeGenSpecifics::Marshalling marshal;
679       marshal.emplace_back(fieldType, AT{});
680       return marshal;
681     }
682 
683     if (Hi == ArgClass::NoClass || Hi == ArgClass::SSEUp) {
684       // Return a single integer or floating point argument.
685       mlir::Type lowType = pickLLVMArgType(loc, context, Lo, byteOffset);
686       CodeGenSpecifics::Marshalling marshal;
687       marshal.emplace_back(lowType, AT{});
688       return marshal;
689     }
690     // Will be returned in two different registers. Generate {lowTy, HiTy} for
691     // the LLVM IR result type.
692     CodeGenSpecifics::Marshalling marshal;
693     mlir::Type lowType = pickLLVMArgType(loc, context, Lo, 8u);
694     mlir::Type hiType = pickLLVMArgType(loc, context, Hi, byteOffset - 8u);
695     marshal.emplace_back(mlir::TupleType::get(context, {lowType, hiType}),
696                          AT{});
697     return marshal;
698   }
699 
700   /// Marshal an argument that must be passed on the stack.
701   CodeGenSpecifics::Marshalling
702   passOnTheStack(mlir::Location loc, mlir::Type ty, bool isResult) const {
703     CodeGenSpecifics::Marshalling marshal;
704     auto sizeAndAlign =
705         fir::getTypeSizeAndAlignmentOrCrash(loc, ty, getDataLayout(), kindMap);
706     // The stack is always 8 byte aligned (note 14 in 3.2.3).
707     unsigned short align =
708         std::max(sizeAndAlign.second, static_cast<unsigned short>(8));
709     marshal.emplace_back(fir::ReferenceType::get(ty),
710                          AT{align, /*byval=*/!isResult, /*sret=*/isResult});
711     return marshal;
712   }
713 };
714 } // namespace
715 
716 //===----------------------------------------------------------------------===//
717 // x86_64 (x86 64 bit) Windows target specifics.
718 //===----------------------------------------------------------------------===//
719 
720 namespace {
721 struct TargetX86_64Win : public GenericTarget<TargetX86_64Win> {
722   using GenericTarget::GenericTarget;
723 
724   static constexpr int defaultWidth = 64;
725 
726   CodeGenSpecifics::Marshalling
727   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
728     CodeGenSpecifics::Marshalling marshal;
729     const auto *sem = &floatToSemantics(kindMap, eleTy);
730     if (sem == &llvm::APFloat::IEEEsingle()) {
731       // i64   pack both floats in a 64-bit GPR
732       marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
733                            AT{});
734     } else if (sem == &llvm::APFloat::IEEEdouble()) {
735       // Use a type that will be translated into LLVM as:
736       // { double, double }   struct of 2 double, byval, align 8
737       marshal.emplace_back(
738           fir::ReferenceType::get(mlir::TupleType::get(
739               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
740           AT{/*align=*/8, /*byval=*/true});
741     } else if (sem == &llvm::APFloat::IEEEquad() ||
742                sem == &llvm::APFloat::x87DoubleExtended()) {
743       // Use a type that will be translated into LLVM as:
744       // { t, t }   struct of 2 eleTy, byval, align 16
745       marshal.emplace_back(
746           fir::ReferenceType::get(mlir::TupleType::get(
747               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
748           AT{/*align=*/16, /*byval=*/true});
749     } else {
750       typeTodo(sem, loc, "argument");
751     }
752     return marshal;
753   }
754 
755   CodeGenSpecifics::Marshalling
756   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
757     CodeGenSpecifics::Marshalling marshal;
758     const auto *sem = &floatToSemantics(kindMap, eleTy);
759     if (sem == &llvm::APFloat::IEEEsingle()) {
760       // i64   pack both floats in a 64-bit GPR
761       marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
762                            AT{});
763     } else if (sem == &llvm::APFloat::IEEEdouble()) {
764       // Use a type that will be translated into LLVM as:
765       // { double, double }   struct of 2 double, sret, align 8
766       marshal.emplace_back(
767           fir::ReferenceType::get(mlir::TupleType::get(
768               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
769           AT{/*align=*/8, /*byval=*/false, /*sret=*/true});
770     } else if (sem == &llvm::APFloat::IEEEquad() ||
771                sem == &llvm::APFloat::x87DoubleExtended()) {
772       // Use a type that will be translated into LLVM as:
773       // { t, t }   struct of 2 eleTy, sret, align 16
774       marshal.emplace_back(
775           fir::ReferenceType::get(mlir::TupleType::get(
776               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
777           AT{/*align=*/16, /*byval=*/false, /*sret=*/true});
778     } else {
779       typeTodo(sem, loc, "return");
780     }
781     return marshal;
782   }
783 };
784 } // namespace
785 
786 //===----------------------------------------------------------------------===//
787 // AArch64 linux target specifics.
788 //===----------------------------------------------------------------------===//
789 
790 namespace {
791 struct TargetAArch64 : public GenericTarget<TargetAArch64> {
792   using GenericTarget::GenericTarget;
793 
794   static constexpr int defaultWidth = 64;
795 
796   CodeGenSpecifics::Marshalling
797   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
798     CodeGenSpecifics::Marshalling marshal;
799     const auto *sem = &floatToSemantics(kindMap, eleTy);
800     if (sem == &llvm::APFloat::IEEEsingle() ||
801         sem == &llvm::APFloat::IEEEdouble() ||
802         sem == &llvm::APFloat::IEEEquad()) {
803       // [2 x t]   array of 2 eleTy
804       marshal.emplace_back(fir::SequenceType::get({2}, eleTy), AT{});
805     } else {
806       typeTodo(sem, loc, "argument");
807     }
808     return marshal;
809   }
810 
811   CodeGenSpecifics::Marshalling
812   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
813     CodeGenSpecifics::Marshalling marshal;
814     const auto *sem = &floatToSemantics(kindMap, eleTy);
815     if (sem == &llvm::APFloat::IEEEsingle() ||
816         sem == &llvm::APFloat::IEEEdouble() ||
817         sem == &llvm::APFloat::IEEEquad()) {
818       // Use a type that will be translated into LLVM as:
819       // { t, t }   struct of 2 eleTy
820       marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
821                                                 mlir::TypeRange{eleTy, eleTy}),
822                            AT{});
823     } else {
824       typeTodo(sem, loc, "return");
825     }
826     return marshal;
827   }
828 };
829 } // namespace
830 
831 //===----------------------------------------------------------------------===//
832 // PPC64 (AIX 64 bit) target specifics.
833 //===----------------------------------------------------------------------===//
834 
835 namespace {
836 struct TargetPPC64 : public GenericTarget<TargetPPC64> {
837   using GenericTarget::GenericTarget;
838 
839   static constexpr int defaultWidth = 64;
840 
841   CodeGenSpecifics::Marshalling
842   complexArgumentType(mlir::Location, mlir::Type eleTy) const override {
843     CodeGenSpecifics::Marshalling marshal;
844     // two distinct element type arguments (re, im)
845     marshal.emplace_back(eleTy, AT{});
846     marshal.emplace_back(eleTy, AT{});
847     return marshal;
848   }
849 
850   CodeGenSpecifics::Marshalling
851   complexReturnType(mlir::Location, mlir::Type eleTy) const override {
852     CodeGenSpecifics::Marshalling marshal;
853     // Use a type that will be translated into LLVM as:
854     // { t, t }   struct of 2 element type
855     marshal.emplace_back(
856         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy}),
857         AT{});
858     return marshal;
859   }
860 };
861 } // namespace
862 
863 //===----------------------------------------------------------------------===//
864 // PPC64le linux target specifics.
865 //===----------------------------------------------------------------------===//
866 
867 namespace {
868 struct TargetPPC64le : public GenericTarget<TargetPPC64le> {
869   using GenericTarget::GenericTarget;
870 
871   static constexpr int defaultWidth = 64;
872 
873   CodeGenSpecifics::Marshalling
874   complexArgumentType(mlir::Location, mlir::Type eleTy) const override {
875     CodeGenSpecifics::Marshalling marshal;
876     // two distinct element type arguments (re, im)
877     marshal.emplace_back(eleTy, AT{});
878     marshal.emplace_back(eleTy, AT{});
879     return marshal;
880   }
881 
882   CodeGenSpecifics::Marshalling
883   complexReturnType(mlir::Location, mlir::Type eleTy) const override {
884     CodeGenSpecifics::Marshalling marshal;
885     // Use a type that will be translated into LLVM as:
886     // { t, t }   struct of 2 element type
887     marshal.emplace_back(
888         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy}),
889         AT{});
890     return marshal;
891   }
892 };
893 } // namespace
894 
895 //===----------------------------------------------------------------------===//
896 // sparc (sparc 32 bit) target specifics.
897 //===----------------------------------------------------------------------===//
898 
899 namespace {
900 struct TargetSparc : public GenericTarget<TargetSparc> {
901   using GenericTarget::GenericTarget;
902 
903   static constexpr int defaultWidth = 32;
904 
905   CodeGenSpecifics::Marshalling
906   complexArgumentType(mlir::Location, mlir::Type eleTy) const override {
907     assert(fir::isa_real(eleTy));
908     CodeGenSpecifics::Marshalling marshal;
909     // Use a type that will be translated into LLVM as:
910     // { t, t }   struct of 2 eleTy
911     auto structTy =
912         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy});
913     marshal.emplace_back(fir::ReferenceType::get(structTy), AT{});
914     return marshal;
915   }
916 
917   CodeGenSpecifics::Marshalling
918   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
919     assert(fir::isa_real(eleTy));
920     CodeGenSpecifics::Marshalling marshal;
921     // Use a type that will be translated into LLVM as:
922     // { t, t }   struct of 2 eleTy, byval
923     auto structTy =
924         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy});
925     marshal.emplace_back(fir::ReferenceType::get(structTy),
926                          AT{/*alignment=*/0, /*byval=*/true});
927     return marshal;
928   }
929 };
930 } // namespace
931 
932 //===----------------------------------------------------------------------===//
933 // sparcv9 (sparc 64 bit) target specifics.
934 //===----------------------------------------------------------------------===//
935 
936 namespace {
937 struct TargetSparcV9 : public GenericTarget<TargetSparcV9> {
938   using GenericTarget::GenericTarget;
939 
940   static constexpr int defaultWidth = 64;
941 
942   CodeGenSpecifics::Marshalling
943   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
944     CodeGenSpecifics::Marshalling marshal;
945     const auto *sem = &floatToSemantics(kindMap, eleTy);
946     if (sem == &llvm::APFloat::IEEEsingle() ||
947         sem == &llvm::APFloat::IEEEdouble()) {
948       // two distinct float, double arguments
949       marshal.emplace_back(eleTy, AT{});
950       marshal.emplace_back(eleTy, AT{});
951     } else if (sem == &llvm::APFloat::IEEEquad()) {
952       // Use a type that will be translated into LLVM as:
953       // { fp128, fp128 }   struct of 2 fp128, byval, align 16
954       marshal.emplace_back(
955           fir::ReferenceType::get(mlir::TupleType::get(
956               eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
957           AT{/*align=*/16, /*byval=*/true});
958     } else {
959       typeTodo(sem, loc, "argument");
960     }
961     return marshal;
962   }
963 
964   CodeGenSpecifics::Marshalling
965   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
966     CodeGenSpecifics::Marshalling marshal;
967     // Use a type that will be translated into LLVM as:
968     // { eleTy, eleTy }   struct of 2 eleTy
969     marshal.emplace_back(
970         mlir::TupleType::get(eleTy.getContext(), mlir::TypeRange{eleTy, eleTy}),
971         AT{});
972     return marshal;
973   }
974 };
975 } // namespace
976 
977 //===----------------------------------------------------------------------===//
978 // RISCV64 linux target specifics.
979 //===----------------------------------------------------------------------===//
980 
981 namespace {
982 struct TargetRISCV64 : public GenericTarget<TargetRISCV64> {
983   using GenericTarget::GenericTarget;
984 
985   static constexpr int defaultWidth = 64;
986 
987   CodeGenSpecifics::Marshalling
988   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
989     CodeGenSpecifics::Marshalling marshal;
990     const auto *sem = &floatToSemantics(kindMap, eleTy);
991     if (sem == &llvm::APFloat::IEEEsingle() ||
992         sem == &llvm::APFloat::IEEEdouble()) {
993       // Two distinct element type arguments (re, im)
994       marshal.emplace_back(eleTy, AT{});
995       marshal.emplace_back(eleTy, AT{});
996     } else {
997       typeTodo(sem, loc, "argument");
998     }
999     return marshal;
1000   }
1001 
1002   CodeGenSpecifics::Marshalling
1003   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
1004     CodeGenSpecifics::Marshalling marshal;
1005     const auto *sem = &floatToSemantics(kindMap, eleTy);
1006     if (sem == &llvm::APFloat::IEEEsingle() ||
1007         sem == &llvm::APFloat::IEEEdouble()) {
1008       // Use a type that will be translated into LLVM as:
1009       // { t, t }   struct of 2 eleTy, byVal
1010       marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
1011                                                 mlir::TypeRange{eleTy, eleTy}),
1012                            AT{/*alignment=*/0, /*byval=*/true});
1013     } else {
1014       typeTodo(sem, loc, "return");
1015     }
1016     return marshal;
1017   }
1018 };
1019 } // namespace
1020 
1021 //===----------------------------------------------------------------------===//
1022 // AMDGPU linux target specifics.
1023 //===----------------------------------------------------------------------===//
1024 
1025 namespace {
1026 struct TargetAMDGPU : public GenericTarget<TargetAMDGPU> {
1027   using GenericTarget::GenericTarget;
1028 
1029   // Default size (in bits) of the index type for strings.
1030   static constexpr int defaultWidth = 64;
1031 
1032   CodeGenSpecifics::Marshalling
1033   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
1034     CodeGenSpecifics::Marshalling marshal;
1035     TODO(loc, "handle complex argument types");
1036     return marshal;
1037   }
1038 
1039   CodeGenSpecifics::Marshalling
1040   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
1041     CodeGenSpecifics::Marshalling marshal;
1042     TODO(loc, "handle complex return types");
1043     return marshal;
1044   }
1045 };
1046 } // namespace
1047 
1048 //===----------------------------------------------------------------------===//
1049 // NVPTX linux target specifics.
1050 //===----------------------------------------------------------------------===//
1051 
1052 namespace {
1053 struct TargetNVPTX : public GenericTarget<TargetNVPTX> {
1054   using GenericTarget::GenericTarget;
1055 
1056   // Default size (in bits) of the index type for strings.
1057   static constexpr int defaultWidth = 64;
1058 
1059   CodeGenSpecifics::Marshalling
1060   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
1061     CodeGenSpecifics::Marshalling marshal;
1062     TODO(loc, "handle complex argument types");
1063     return marshal;
1064   }
1065 
1066   CodeGenSpecifics::Marshalling
1067   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
1068     CodeGenSpecifics::Marshalling marshal;
1069     TODO(loc, "handle complex return types");
1070     return marshal;
1071   }
1072 };
1073 } // namespace
1074 
1075 //===----------------------------------------------------------------------===//
1076 // LoongArch64 linux target specifics.
1077 //===----------------------------------------------------------------------===//
1078 
1079 namespace {
1080 struct TargetLoongArch64 : public GenericTarget<TargetLoongArch64> {
1081   using GenericTarget::GenericTarget;
1082 
1083   static constexpr int defaultWidth = 64;
1084 
1085   CodeGenSpecifics::Marshalling
1086   complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
1087     CodeGenSpecifics::Marshalling marshal;
1088     const auto *sem = &floatToSemantics(kindMap, eleTy);
1089     if (sem == &llvm::APFloat::IEEEsingle() ||
1090         sem == &llvm::APFloat::IEEEdouble()) {
1091       // Two distinct element type arguments (re, im)
1092       marshal.emplace_back(eleTy, AT{});
1093       marshal.emplace_back(eleTy, AT{});
1094     } else {
1095       typeTodo(sem, loc, "argument");
1096     }
1097     return marshal;
1098   }
1099 
1100   CodeGenSpecifics::Marshalling
1101   complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
1102     CodeGenSpecifics::Marshalling marshal;
1103     const auto *sem = &floatToSemantics(kindMap, eleTy);
1104     if (sem == &llvm::APFloat::IEEEsingle() ||
1105         sem == &llvm::APFloat::IEEEdouble()) {
1106       // Use a type that will be translated into LLVM as:
1107       // { t, t }   struct of 2 eleTy, byVal
1108       marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
1109                                                 mlir::TypeRange{eleTy, eleTy}),
1110                            AT{/*alignment=*/0, /*byval=*/true});
1111     } else {
1112       typeTodo(sem, loc, "return");
1113     }
1114     return marshal;
1115   }
1116 };
1117 } // namespace
1118 
1119 // Instantiate the overloaded target instance based on the triple value.
1120 // TODO: Add other targets to this file as needed.
1121 std::unique_ptr<fir::CodeGenSpecifics>
1122 fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
1123                            KindMapping &&kindMap, llvm::StringRef targetCPU,
1124                            mlir::LLVM::TargetFeaturesAttr targetFeatures,
1125                            const mlir::DataLayout &dl) {
1126   switch (trp.getArch()) {
1127   default:
1128     break;
1129   case llvm::Triple::ArchType::x86:
1130     if (trp.isOSWindows())
1131       return std::make_unique<TargetI386Win>(ctx, std::move(trp),
1132                                              std::move(kindMap), targetCPU,
1133                                              targetFeatures, dl);
1134     else
1135       return std::make_unique<TargetI386>(ctx, std::move(trp),
1136                                           std::move(kindMap), targetCPU,
1137                                           targetFeatures, dl);
1138   case llvm::Triple::ArchType::x86_64:
1139     if (trp.isOSWindows())
1140       return std::make_unique<TargetX86_64Win>(ctx, std::move(trp),
1141                                                std::move(kindMap), targetCPU,
1142                                                targetFeatures, dl);
1143     else
1144       return std::make_unique<TargetX86_64>(ctx, std::move(trp),
1145                                             std::move(kindMap), targetCPU,
1146                                             targetFeatures, dl);
1147   case llvm::Triple::ArchType::aarch64:
1148     return std::make_unique<TargetAArch64>(
1149         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1150   case llvm::Triple::ArchType::ppc64:
1151     return std::make_unique<TargetPPC64>(
1152         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1153   case llvm::Triple::ArchType::ppc64le:
1154     return std::make_unique<TargetPPC64le>(
1155         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1156   case llvm::Triple::ArchType::sparc:
1157     return std::make_unique<TargetSparc>(
1158         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1159   case llvm::Triple::ArchType::sparcv9:
1160     return std::make_unique<TargetSparcV9>(
1161         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1162   case llvm::Triple::ArchType::riscv64:
1163     return std::make_unique<TargetRISCV64>(
1164         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1165   case llvm::Triple::ArchType::amdgcn:
1166     return std::make_unique<TargetAMDGPU>(
1167         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1168   case llvm::Triple::ArchType::nvptx64:
1169     return std::make_unique<TargetNVPTX>(
1170         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1171   case llvm::Triple::ArchType::loongarch64:
1172     return std::make_unique<TargetLoongArch64>(
1173         ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1174   }
1175   TODO(mlir::UnknownLoc::get(ctx), "target not implemented");
1176 }
1177 
1178 std::unique_ptr<fir::CodeGenSpecifics> fir::CodeGenSpecifics::get(
1179     mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap,
1180     llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
1181     const mlir::DataLayout &dl, llvm::StringRef tuneCPU) {
1182   std::unique_ptr<fir::CodeGenSpecifics> CGS = fir::CodeGenSpecifics::get(
1183       ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
1184 
1185   CGS->tuneCPU = tuneCPU;
1186   return CGS;
1187 }
1188