xref: /netbsd-src/external/apache2/llvm/dist/clang/utils/TableGen/MveEmitter.cpp (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 //===- MveEmitter.cpp - Generate arm_mve.h for use with clang -*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This set of linked tablegen backends is responsible for emitting the bits
10 // and pieces that implement <arm_mve.h>, which is defined by the ACLE standard
11 // and provides a set of types and functions for (more or less) direct access
12 // to the MVE instruction set, including the scalar shifts as well as the
13 // vector instructions.
14 //
15 // MVE's standard intrinsic functions are unusual in that they have a system of
16 // polymorphism. For example, the function vaddq() can behave like vaddq_u16(),
17 // vaddq_f32(), vaddq_s8(), etc., depending on the types of the vector
18 // arguments you give it.
19 //
20 // This constrains the implementation strategies. The usual approach to making
21 // the user-facing functions polymorphic would be to either use
22 // __attribute__((overloadable)) to make a set of vaddq() functions that are
23 // all inline wrappers on the underlying clang builtins, or to define a single
24 // vaddq() macro which expands to an instance of _Generic.
25 //
26 // The inline-wrappers approach would work fine for most intrinsics, except for
27 // the ones that take an argument required to be a compile-time constant,
28 // because if you wrap an inline function around a call to a builtin, the
29 // constant nature of the argument is not passed through.
30 //
31 // The _Generic approach can be made to work with enough effort, but it takes a
32 // lot of machinery, because of the design feature of _Generic that even the
33 // untaken branches are required to pass all front-end validity checks such as
34 // type-correctness. You can work around that by nesting further _Generics all
35 // over the place to coerce things to the right type in untaken branches, but
36 // what you get out is complicated, hard to guarantee its correctness, and
37 // worst of all, gives _completely unreadable_ error messages if the user gets
38 // the types wrong for an intrinsic call.
39 //
40 // Therefore, my strategy is to introduce a new __attribute__ that allows a
41 // function to be mapped to a clang builtin even though it doesn't have the
42 // same name, and then declare all the user-facing MVE function names with that
43 // attribute, mapping each one directly to the clang builtin. And the
44 // polymorphic ones have __attribute__((overloadable)) as well. So once the
45 // compiler has resolved the overload, it knows the internal builtin ID of the
46 // selected function, and can check the immediate arguments against that; and
47 // if the user gets the types wrong in a call to a polymorphic intrinsic, they
48 // get a completely clear error message showing all the declarations of that
49 // function in the header file and explaining why each one doesn't fit their
50 // call.
51 //
52 // The downside of this is that if every clang builtin has to correspond
53 // exactly to a user-facing ACLE intrinsic, then you can't save work in the
54 // frontend by doing it in the header file: CGBuiltin.cpp has to do the entire
55 // job of converting an ACLE intrinsic call into LLVM IR. So the Tablegen
56 // description for an MVE intrinsic has to contain a full description of the
57 // sequence of IRBuilder calls that clang will need to make.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/APInt.h"
62 #include "llvm/ADT/StringRef.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/TableGen/Error.h"
66 #include "llvm/TableGen/Record.h"
67 #include <cassert>
68 #include <cstddef>
69 #include <cstdint>
70 #include <list>
71 #include <map>
72 #include <memory>
73 #include <set>
74 #include <string>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 namespace {
80 
81 class MveEmitter;
82 class Result;
83 
84 // -----------------------------------------------------------------------------
85 // A system of classes to represent all the types we'll need to deal with in
86 // the prototypes of intrinsics.
87 //
88 // Query methods include finding out the C name of a type; the "LLVM name" in
89 // the sense of a C++ code snippet that can be used in the codegen function;
90 // the suffix that represents the type in the ACLE intrinsic naming scheme
91 // (e.g. 's32' represents int32_t in intrinsics such as vaddq_s32); whether the
92 // type is floating-point related (hence should be under #ifdef in the MVE
93 // header so that it isn't included in integer-only MVE mode); and the type's
94 // size in bits. Not all subtypes support all these queries.
95 
96 class Type {
97 public:
98   enum class TypeKind {
99     // Void appears as a return type (for store intrinsics, which are pure
100     // side-effect). It's also used as the parameter type in the Tablegen
101     // when an intrinsic doesn't need to come in various suffixed forms like
102     // vfooq_s8,vfooq_u16,vfooq_f32.
103     Void,
104 
105     // Scalar is used for ordinary int and float types of all sizes.
106     Scalar,
107 
108     // Vector is used for anything that occupies exactly one MVE vector
109     // register, i.e. {uint,int,float}NxM_t.
110     Vector,
111 
112     // MultiVector is used for the {uint,int,float}NxMxK_t types used by the
113     // interleaving load/store intrinsics v{ld,st}{2,4}q.
114     MultiVector,
115 
116     // Predicate is used by all the predicated intrinsics. Its C
117     // representation is mve_pred16_t (which is just an alias for uint16_t).
118     // But we give more detail here, by indicating that a given predicate
119     // instruction is logically regarded as a vector of i1 containing the
120     // same number of lanes as the input vector type. So our Predicate type
121     // comes with a lane count, which we use to decide which kind of <n x i1>
122     // we'll invoke the pred_i2v IR intrinsic to translate it into.
123     Predicate,
124 
125     // Pointer is used for pointer types (obviously), and comes with a flag
126     // indicating whether it's a pointer to a const or mutable instance of
127     // the pointee type.
128     Pointer,
129   };
130 
131 private:
132   const TypeKind TKind;
133 
134 protected:
135   Type(TypeKind K) : TKind(K) {}
136 
137 public:
138   TypeKind typeKind() const { return TKind; }
139   virtual ~Type() = default;
140   virtual bool requiresFloat() const = 0;
141   virtual unsigned sizeInBits() const = 0;
142   virtual std::string cName() const = 0;
143   virtual std::string llvmName() const {
144     PrintFatalError("no LLVM type name available for type " + cName());
145   }
146   virtual std::string acleSuffix() const {
147     PrintFatalError("no ACLE suffix available for this type");
148   }
149 };
150 
151 enum class ScalarTypeKind { SignedInt, UnsignedInt, Float };
152 inline std::string toLetter(ScalarTypeKind kind) {
153   switch (kind) {
154   case ScalarTypeKind::SignedInt:
155     return "s";
156   case ScalarTypeKind::UnsignedInt:
157     return "u";
158   case ScalarTypeKind::Float:
159     return "f";
160   }
161   llvm_unreachable("Unhandled ScalarTypeKind enum");
162 }
163 inline std::string toCPrefix(ScalarTypeKind kind) {
164   switch (kind) {
165   case ScalarTypeKind::SignedInt:
166     return "int";
167   case ScalarTypeKind::UnsignedInt:
168     return "uint";
169   case ScalarTypeKind::Float:
170     return "float";
171   }
172   llvm_unreachable("Unhandled ScalarTypeKind enum");
173 }
174 
175 class VoidType : public Type {
176 public:
177   VoidType() : Type(TypeKind::Void) {}
178   unsigned sizeInBits() const override { return 0; }
179   bool requiresFloat() const override { return false; }
180   std::string cName() const override { return "void"; }
181 
182   static bool classof(const Type *T) { return T->typeKind() == TypeKind::Void; }
183   std::string acleSuffix() const override { return ""; }
184 };
185 
186 class PointerType : public Type {
187   const Type *Pointee;
188   bool Const;
189 
190 public:
191   PointerType(const Type *Pointee, bool Const)
192       : Type(TypeKind::Pointer), Pointee(Pointee), Const(Const) {}
193   unsigned sizeInBits() const override { return 32; }
194   bool requiresFloat() const override { return Pointee->requiresFloat(); }
195   std::string cName() const override {
196     std::string Name = Pointee->cName();
197 
198     // The syntax for a pointer in C is different when the pointee is
199     // itself a pointer. The MVE intrinsics don't contain any double
200     // pointers, so we don't need to worry about that wrinkle.
201     assert(!isa<PointerType>(Pointee) && "Pointer to pointer not supported");
202 
203     if (Const)
204       Name = "const " + Name;
205     return Name + " *";
206   }
207 
208   static bool classof(const Type *T) {
209     return T->typeKind() == TypeKind::Pointer;
210   }
211 };
212 
213 // Base class for all the types that have a name of the form
214 // [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t.
215 //
216 // For this sub-hierarchy we invent a cNameBase() method which returns the
217 // whole name except for the trailing "_t", so that Vector and MultiVector can
218 // append an extra "x2" or whatever to their element type's cNameBase(). Then
219 // the main cName() query method puts "_t" on the end for the final type name.
220 
221 class CRegularNamedType : public Type {
222   using Type::Type;
223   virtual std::string cNameBase() const = 0;
224 
225 public:
226   std::string cName() const override { return cNameBase() + "_t"; }
227 };
228 
229 class ScalarType : public CRegularNamedType {
230   ScalarTypeKind Kind;
231   unsigned Bits;
232 
233 public:
234   ScalarType(const Record *Record) : CRegularNamedType(TypeKind::Scalar) {
235     Kind = StringSwitch<ScalarTypeKind>(Record->getValueAsString("kind"))
236                .Case("s", ScalarTypeKind::SignedInt)
237                .Case("u", ScalarTypeKind::UnsignedInt)
238                .Case("f", ScalarTypeKind::Float);
239     Bits = Record->getValueAsInt("size");
240   }
241   unsigned sizeInBits() const override { return Bits; }
242   ScalarTypeKind kind() const { return Kind; }
243   std::string suffix() const { return toLetter(Kind) + utostr(Bits); }
244   std::string cNameBase() const override {
245     return toCPrefix(Kind) + utostr(Bits);
246   }
247   std::string llvmName() const override {
248     if (Kind == ScalarTypeKind::Float) {
249       if (Bits == 16)
250         return "HalfTy";
251       if (Bits == 32)
252         return "FloatTy";
253       if (Bits == 64)
254         return "DoubleTy";
255       PrintFatalError("bad size for floating type");
256     }
257     return "Int" + utostr(Bits) + "Ty";
258   }
259   std::string acleSuffix() const override {
260     return "_" + toLetter(Kind) + utostr(Bits);
261   }
262   bool isInteger() const { return Kind != ScalarTypeKind::Float; }
263   bool requiresFloat() const override { return !isInteger(); }
264 
265   static bool classof(const Type *T) {
266     return T->typeKind() == TypeKind::Scalar;
267   }
268 };
269 
270 class VectorType : public CRegularNamedType {
271   const ScalarType *Element;
272   unsigned Lanes;
273 
274 public:
275   VectorType(const ScalarType *Element)
276       : CRegularNamedType(TypeKind::Vector), Element(Element) {
277     // MVE has a fixed 128-bit vector size
278     Lanes = 128 / Element->sizeInBits();
279   }
280   unsigned sizeInBits() const override { return 128; }
281   unsigned lanes() const { return Lanes; }
282   bool requiresFloat() const override { return Element->requiresFloat(); }
283   std::string cNameBase() const override {
284     return Element->cNameBase() + "x" + utostr(Lanes);
285   }
286   std::string llvmName() const override {
287     return "llvm::VectorType::get(" + Element->llvmName() + ", " +
288            utostr(Lanes) + ")";
289   }
290 
291   static bool classof(const Type *T) {
292     return T->typeKind() == TypeKind::Vector;
293   }
294 };
295 
296 class MultiVectorType : public CRegularNamedType {
297   const VectorType *Element;
298   unsigned Registers;
299 
300 public:
301   MultiVectorType(unsigned Registers, const VectorType *Element)
302       : CRegularNamedType(TypeKind::MultiVector), Element(Element),
303         Registers(Registers) {}
304   unsigned sizeInBits() const override {
305     return Registers * Element->sizeInBits();
306   }
307   unsigned registers() const { return Registers; }
308   bool requiresFloat() const override { return Element->requiresFloat(); }
309   std::string cNameBase() const override {
310     return Element->cNameBase() + "x" + utostr(Registers);
311   }
312 
313   // MultiVectorType doesn't override llvmName, because we don't expect to do
314   // automatic code generation for the MVE intrinsics that use it: the {vld2,
315   // vld4, vst2, vst4} family are the only ones that use these types, so it was
316   // easier to hand-write the codegen for dealing with these structs than to
317   // build in lots of extra automatic machinery that would only be used once.
318 
319   static bool classof(const Type *T) {
320     return T->typeKind() == TypeKind::MultiVector;
321   }
322 };
323 
324 class PredicateType : public CRegularNamedType {
325   unsigned Lanes;
326 
327 public:
328   PredicateType(unsigned Lanes)
329       : CRegularNamedType(TypeKind::Predicate), Lanes(Lanes) {}
330   unsigned sizeInBits() const override { return 16; }
331   std::string cNameBase() const override { return "mve_pred16"; }
332   bool requiresFloat() const override { return false; };
333   std::string llvmName() const override {
334     // Use <4 x i1> instead of <2 x i1> for two-lane vector types. See
335     // the comment in llvm/lib/Target/ARM/ARMInstrMVE.td for further
336     // explanation.
337     unsigned ModifiedLanes = (Lanes == 2 ? 4 : Lanes);
338 
339     return "llvm::VectorType::get(Builder.getInt1Ty(), " +
340            utostr(ModifiedLanes) + ")";
341   }
342 
343   static bool classof(const Type *T) {
344     return T->typeKind() == TypeKind::Predicate;
345   }
346 };
347 
348 // -----------------------------------------------------------------------------
349 // Class to facilitate merging together the code generation for many intrinsics
350 // by means of varying a few constant or type parameters.
351 //
352 // Most obviously, the intrinsics in a single parametrised family will have
353 // code generation sequences that only differ in a type or two, e.g. vaddq_s8
354 // and vaddq_u16 will look the same apart from putting a different vector type
355 // in the call to CGM.getIntrinsic(). But also, completely different intrinsics
356 // will often code-generate in the same way, with only a different choice of
357 // _which_ IR intrinsic they lower to (e.g. vaddq_m_s8 and vmulq_m_s8), but
358 // marshalling the arguments and return values of the IR intrinsic in exactly
359 // the same way. And others might differ only in some other kind of constant,
360 // such as a lane index.
361 //
362 // So, when we generate the IR-building code for all these intrinsics, we keep
363 // track of every value that could possibly be pulled out of the code and
364 // stored ahead of time in a local variable. Then we group together intrinsics
365 // by textual equivalence of the code that would result if _all_ those
366 // parameters were stored in local variables. That gives us maximal sets that
367 // can be implemented by a single piece of IR-building code by changing
368 // parameter values ahead of time.
369 //
370 // After we've done that, we do a second pass in which we only allocate _some_
371 // of the parameters into local variables, by tracking which ones have the same
372 // values as each other (so that a single variable can be reused) and which
373 // ones are the same across the whole set (so that no variable is needed at
374 // all).
375 //
376 // Hence the class below. Its allocParam method is invoked during code
377 // generation by every method of a Result subclass (see below) that wants to
378 // give it the opportunity to pull something out into a switchable parameter.
379 // It returns a variable name for the parameter, or (if it's being used in the
380 // second pass once we've decided that some parameters don't need to be stored
381 // in variables after all) it might just return the input expression unchanged.
382 
383 struct CodeGenParamAllocator {
384   // Accumulated during code generation
385   std::vector<std::string> *ParamTypes = nullptr;
386   std::vector<std::string> *ParamValues = nullptr;
387 
388   // Provided ahead of time in pass 2, to indicate which parameters are being
389   // assigned to what. This vector contains an entry for each call to
390   // allocParam expected during code gen (which we counted up in pass 1), and
391   // indicates the number of the parameter variable that should be returned, or
392   // -1 if this call shouldn't allocate a parameter variable at all.
393   //
394   // We rely on the recursive code generation working identically in passes 1
395   // and 2, so that the same list of calls to allocParam happen in the same
396   // order. That guarantees that the parameter numbers recorded in pass 1 will
397   // match the entries in this vector that store what MveEmitter::EmitBuiltinCG
398   // decided to do about each one in pass 2.
399   std::vector<int> *ParamNumberMap = nullptr;
400 
401   // Internally track how many things we've allocated
402   unsigned nparams = 0;
403 
404   std::string allocParam(StringRef Type, StringRef Value) {
405     unsigned ParamNumber;
406 
407     if (!ParamNumberMap) {
408       // In pass 1, unconditionally assign a new parameter variable to every
409       // value we're asked to process.
410       ParamNumber = nparams++;
411     } else {
412       // In pass 2, consult the map provided by the caller to find out which
413       // variable we should be keeping things in.
414       int MapValue = (*ParamNumberMap)[nparams++];
415       if (MapValue < 0)
416         return Value;
417       ParamNumber = MapValue;
418     }
419 
420     // If we've allocated a new parameter variable for the first time, store
421     // its type and value to be retrieved after codegen.
422     if (ParamTypes && ParamTypes->size() == ParamNumber)
423       ParamTypes->push_back(Type);
424     if (ParamValues && ParamValues->size() == ParamNumber)
425       ParamValues->push_back(Value);
426 
427     // Unimaginative naming scheme for parameter variables.
428     return "Param" + utostr(ParamNumber);
429   }
430 };
431 
432 // -----------------------------------------------------------------------------
433 // System of classes that represent all the intermediate values used during
434 // code-generation for an intrinsic.
435 //
436 // The base class 'Result' can represent a value of the LLVM type 'Value', or
437 // sometimes 'Address' (for loads/stores, including an alignment requirement).
438 //
439 // In the case where the Tablegen provides a value in the codegen dag as a
440 // plain integer literal, the Result object we construct here will be one that
441 // returns true from hasIntegerConstantValue(). This allows the generated C++
442 // code to use the constant directly in contexts which can take a literal
443 // integer, such as Builder.CreateExtractValue(thing, 1), without going to the
444 // effort of calling llvm::ConstantInt::get() and then pulling the constant
445 // back out of the resulting llvm:Value later.
446 
447 class Result {
448 public:
449   // Convenient shorthand for the pointer type we'll be using everywhere.
450   using Ptr = std::shared_ptr<Result>;
451 
452 private:
453   Ptr Predecessor;
454   std::string VarName;
455   bool VarNameUsed = false;
456   unsigned Visited = 0;
457 
458 public:
459   virtual ~Result() = default;
460   using Scope = std::map<std::string, Ptr>;
461   virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0;
462   virtual bool hasIntegerConstantValue() const { return false; }
463   virtual uint32_t integerConstantValue() const { return 0; }
464   virtual std::string typeName() const { return "Value *"; }
465 
466   // Mostly, when a code-generation operation has a dependency on prior
467   // operations, it's because it uses the output values of those operations as
468   // inputs. But there's one exception, which is the use of 'seq' in Tablegen
469   // to indicate that operations have to be performed in sequence regardless of
470   // whether they use each others' output values.
471   //
472   // So, the actual generation of code is done by depth-first search, using the
473   // prerequisites() method to get a list of all the other Results that have to
474   // be computed before this one. That method divides into the 'predecessor',
475   // set by setPredecessor() while processing a 'seq' dag node, and the list
476   // returned by 'morePrerequisites', which each subclass implements to return
477   // a list of the Results it uses as input to whatever its own computation is
478   // doing.
479 
480   virtual void morePrerequisites(std::vector<Ptr> &output) const {}
481   std::vector<Ptr> prerequisites() const {
482     std::vector<Ptr> ToRet;
483     if (Predecessor)
484       ToRet.push_back(Predecessor);
485     morePrerequisites(ToRet);
486     return ToRet;
487   }
488 
489   void setPredecessor(Ptr p) {
490     assert(!Predecessor);
491     Predecessor = p;
492   }
493 
494   // Each Result will be assigned a variable name in the output code, but not
495   // all those variable names will actually be used (e.g. the return value of
496   // Builder.CreateStore has void type, so nobody will want to refer to it). To
497   // prevent annoying compiler warnings, we track whether each Result's
498   // variable name was ever actually mentioned in subsequent statements, so
499   // that it can be left out of the final generated code.
500   std::string varname() {
501     VarNameUsed = true;
502     return VarName;
503   }
504   void setVarname(const StringRef s) { VarName = s; }
505   bool varnameUsed() const { return VarNameUsed; }
506 
507   // Code generation happens in multiple passes. This method tracks whether a
508   // Result has yet been visited in a given pass, without the need for a
509   // tedious loop in between passes that goes through and resets a 'visited'
510   // flag back to false: you just set Pass=1 the first time round, and Pass=2
511   // the second time.
512   bool needsVisiting(unsigned Pass) {
513     bool ToRet = Visited < Pass;
514     Visited = Pass;
515     return ToRet;
516   }
517 };
518 
519 // Result subclass that retrieves one of the arguments to the clang builtin
520 // function. In cases where the argument has pointer type, we call
521 // EmitPointerWithAlignment and store the result in a variable of type Address,
522 // so that load and store IR nodes can know the right alignment. Otherwise, we
523 // call EmitScalarExpr.
524 //
525 // There are aggregate parameters in the MVE intrinsics API, but we don't deal
526 // with them in this Tablegen back end: they only arise in the vld2q/vld4q and
527 // vst2q/vst4q family, which is few enough that we just write the code by hand
528 // for those in CGBuiltin.cpp.
529 class BuiltinArgResult : public Result {
530 public:
531   unsigned ArgNum;
532   bool AddressType;
533   BuiltinArgResult(unsigned ArgNum, bool AddressType)
534       : ArgNum(ArgNum), AddressType(AddressType) {}
535   void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
536     OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
537        << "(E->getArg(" << ArgNum << "))";
538   }
539   std::string typeName() const override {
540     return AddressType ? "Address" : Result::typeName();
541   }
542 };
543 
544 // Result subclass for an integer literal appearing in Tablegen. This may need
545 // to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or
546 // it may be used directly as an integer, depending on which IRBuilder method
547 // it's being passed to.
548 class IntLiteralResult : public Result {
549 public:
550   const ScalarType *IntegerType;
551   uint32_t IntegerValue;
552   IntLiteralResult(const ScalarType *IntegerType, uint32_t IntegerValue)
553       : IntegerType(IntegerType), IntegerValue(IntegerValue) {}
554   void genCode(raw_ostream &OS,
555                CodeGenParamAllocator &ParamAlloc) const override {
556     OS << "llvm::ConstantInt::get("
557        << ParamAlloc.allocParam("llvm::Type *", IntegerType->llvmName())
558        << ", ";
559     OS << ParamAlloc.allocParam(IntegerType->cName(), utostr(IntegerValue))
560        << ")";
561   }
562   bool hasIntegerConstantValue() const override { return true; }
563   uint32_t integerConstantValue() const override { return IntegerValue; }
564 };
565 
566 // Result subclass representing a cast between different integer types. We use
567 // our own ScalarType abstraction as the representation of the target type,
568 // which gives both size and signedness.
569 class IntCastResult : public Result {
570 public:
571   const ScalarType *IntegerType;
572   Ptr V;
573   IntCastResult(const ScalarType *IntegerType, Ptr V)
574       : IntegerType(IntegerType), V(V) {}
575   void genCode(raw_ostream &OS,
576                CodeGenParamAllocator &ParamAlloc) const override {
577     OS << "Builder.CreateIntCast(" << V->varname() << ", "
578        << ParamAlloc.allocParam("llvm::Type *", IntegerType->llvmName()) << ", "
579        << ParamAlloc.allocParam("bool",
580                                 IntegerType->kind() == ScalarTypeKind::SignedInt
581                                     ? "true"
582                                     : "false")
583        << ")";
584   }
585   void morePrerequisites(std::vector<Ptr> &output) const override {
586     output.push_back(V);
587   }
588 };
589 
590 // Result subclass representing a call to an IRBuilder method. Each IRBuilder
591 // method we want to use will have a Tablegen record giving the method name and
592 // describing any important details of how to call it, such as whether a
593 // particular argument should be an integer constant instead of an llvm::Value.
594 class IRBuilderResult : public Result {
595 public:
596   StringRef BuilderMethod;
597   std::vector<Ptr> Args;
598   std::set<unsigned> AddressArgs;
599   std::set<unsigned> IntConstantArgs;
600   IRBuilderResult(StringRef BuilderMethod, std::vector<Ptr> Args,
601                   std::set<unsigned> AddressArgs,
602                   std::set<unsigned> IntConstantArgs)
603       : BuilderMethod(BuilderMethod), Args(Args), AddressArgs(AddressArgs),
604         IntConstantArgs(IntConstantArgs) {}
605   void genCode(raw_ostream &OS,
606                CodeGenParamAllocator &ParamAlloc) const override {
607     OS << "Builder." << BuilderMethod << "(";
608     const char *Sep = "";
609     for (unsigned i = 0, e = Args.size(); i < e; ++i) {
610       Ptr Arg = Args[i];
611       if (IntConstantArgs.find(i) != IntConstantArgs.end()) {
612         assert(Arg->hasIntegerConstantValue());
613         OS << Sep
614            << ParamAlloc.allocParam("unsigned",
615                                     utostr(Arg->integerConstantValue()));
616       } else {
617         OS << Sep << Arg->varname();
618       }
619       Sep = ", ";
620     }
621     OS << ")";
622   }
623   void morePrerequisites(std::vector<Ptr> &output) const override {
624     for (unsigned i = 0, e = Args.size(); i < e; ++i) {
625       Ptr Arg = Args[i];
626       if (IntConstantArgs.find(i) != IntConstantArgs.end())
627         continue;
628       output.push_back(Arg);
629     }
630   }
631 };
632 
633 // Result subclass representing a call to an IR intrinsic, which we first have
634 // to look up using an Intrinsic::ID constant and an array of types.
635 class IRIntrinsicResult : public Result {
636 public:
637   std::string IntrinsicID;
638   std::vector<const Type *> ParamTypes;
639   std::vector<Ptr> Args;
640   IRIntrinsicResult(StringRef IntrinsicID, std::vector<const Type *> ParamTypes,
641                     std::vector<Ptr> Args)
642       : IntrinsicID(IntrinsicID), ParamTypes(ParamTypes), Args(Args) {}
643   void genCode(raw_ostream &OS,
644                CodeGenParamAllocator &ParamAlloc) const override {
645     std::string IntNo = ParamAlloc.allocParam(
646         "Intrinsic::ID", "Intrinsic::arm_mve_" + IntrinsicID);
647     OS << "Builder.CreateCall(CGM.getIntrinsic(" << IntNo;
648     if (!ParamTypes.empty()) {
649       OS << ", llvm::SmallVector<llvm::Type *, " << ParamTypes.size() << "> {";
650       const char *Sep = "";
651       for (auto T : ParamTypes) {
652         OS << Sep << ParamAlloc.allocParam("llvm::Type *", T->llvmName());
653         Sep = ", ";
654       }
655       OS << "}";
656     }
657     OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";
658     const char *Sep = "";
659     for (auto Arg : Args) {
660       OS << Sep << Arg->varname();
661       Sep = ", ";
662     }
663     OS << "})";
664   }
665   void morePrerequisites(std::vector<Ptr> &output) const override {
666     output.insert(output.end(), Args.begin(), Args.end());
667   }
668 };
669 
670 // -----------------------------------------------------------------------------
671 // Class that describes a single ACLE intrinsic.
672 //
673 // A Tablegen record will typically describe more than one ACLE intrinsic, by
674 // means of setting the 'list<Type> Params' field to a list of multiple
675 // parameter types, so as to define vaddq_{s8,u8,...,f16,f32} all in one go.
676 // We'll end up with one instance of ACLEIntrinsic for *each* parameter type,
677 // rather than a single one for all of them. Hence, the constructor takes both
678 // a Tablegen record and the current value of the parameter type.
679 
680 class ACLEIntrinsic {
681   // Structure documenting that one of the intrinsic's arguments is required to
682   // be a compile-time constant integer, and what constraints there are on its
683   // value. Used when generating Sema checking code.
684   struct ImmediateArg {
685     enum class BoundsType { ExplicitRange, UInt };
686     BoundsType boundsType;
687     int64_t i1, i2;
688     StringRef ExtraCheckType, ExtraCheckArgs;
689     const Type *ArgType;
690   };
691 
692   // For polymorphic intrinsics, FullName is the explicit name that uniquely
693   // identifies this variant of the intrinsic, and ShortName is the name it
694   // shares with at least one other intrinsic.
695   std::string ShortName, FullName;
696 
697   const Type *ReturnType;
698   std::vector<const Type *> ArgTypes;
699   std::map<unsigned, ImmediateArg> ImmediateArgs;
700   Result::Ptr Code;
701 
702   std::map<std::string, std::string> CustomCodeGenArgs;
703 
704   // Recursive function that does the internals of code generation.
705   void genCodeDfs(Result::Ptr V, std::list<Result::Ptr> &Used,
706                   unsigned Pass) const {
707     if (!V->needsVisiting(Pass))
708       return;
709 
710     for (Result::Ptr W : V->prerequisites())
711       genCodeDfs(W, Used, Pass);
712 
713     Used.push_back(V);
714   }
715 
716 public:
717   const std::string &shortName() const { return ShortName; }
718   const std::string &fullName() const { return FullName; }
719   const Type *returnType() const { return ReturnType; }
720   const std::vector<const Type *> &argTypes() const { return ArgTypes; }
721   bool requiresFloat() const {
722     if (ReturnType->requiresFloat())
723       return true;
724     for (const Type *T : ArgTypes)
725       if (T->requiresFloat())
726         return true;
727     return false;
728   }
729   bool polymorphic() const { return ShortName != FullName; }
730 
731   // External entry point for code generation, called from MveEmitter.
732   void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc,
733                unsigned Pass) const {
734     if (!hasCode()) {
735       for (auto kv : CustomCodeGenArgs)
736         OS << "  " << kv.first << " = " << kv.second << ";\n";
737       OS << "  break; // custom code gen\n";
738       return;
739     }
740     std::list<Result::Ptr> Used;
741     genCodeDfs(Code, Used, Pass);
742 
743     unsigned varindex = 0;
744     for (Result::Ptr V : Used)
745       if (V->varnameUsed())
746         V->setVarname("Val" + utostr(varindex++));
747 
748     for (Result::Ptr V : Used) {
749       OS << "  ";
750       if (V == Used.back()) {
751         assert(!V->varnameUsed());
752         OS << "return "; // FIXME: what if the top-level thing is void?
753       } else if (V->varnameUsed()) {
754         std::string Type = V->typeName();
755         OS << V->typeName();
756         if (!StringRef(Type).endswith("*"))
757           OS << " ";
758         OS << V->varname() << " = ";
759       }
760       V->genCode(OS, ParamAlloc);
761       OS << ";\n";
762     }
763   }
764   bool hasCode() const { return Code != nullptr; }
765 
766   std::string genSema() const {
767     std::vector<std::string> SemaChecks;
768 
769     for (const auto &kv : ImmediateArgs) {
770       const ImmediateArg &IA = kv.second;
771 
772       llvm::APInt lo(128, 0), hi(128, 0);
773       switch (IA.boundsType) {
774       case ImmediateArg::BoundsType::ExplicitRange:
775         lo = IA.i1;
776         hi = IA.i2;
777         break;
778       case ImmediateArg::BoundsType::UInt:
779         lo = 0;
780         hi = IA.i1;
781         break;
782       }
783 
784       llvm::APInt typelo, typehi;
785       if (cast<ScalarType>(IA.ArgType)->kind() == ScalarTypeKind::UnsignedInt) {
786         typelo = llvm::APInt::getSignedMinValue(IA.ArgType->sizeInBits());
787         typehi = llvm::APInt::getSignedMaxValue(IA.ArgType->sizeInBits());
788       } else {
789         typelo = llvm::APInt::getMinValue(IA.ArgType->sizeInBits());
790         typehi = llvm::APInt::getMaxValue(IA.ArgType->sizeInBits());
791       }
792       typelo = typelo.sext(128);
793       typehi = typehi.sext(128);
794 
795       std::string Index = utostr(kv.first);
796 
797       if (lo.sle(typelo) && hi.sge(typehi))
798         SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")");
799       else
800         SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index +
801                              ", 0x" + lo.toString(16, true) + ", 0x" +
802                              hi.toString(16, true) + ")");
803 
804       if (!IA.ExtraCheckType.empty()) {
805         std::string Suffix;
806         if (!IA.ExtraCheckArgs.empty())
807           Suffix = (Twine(", ") + IA.ExtraCheckArgs).str();
808         SemaChecks.push_back((Twine("SemaBuiltinConstantArg") +
809                               IA.ExtraCheckType + "(TheCall, " + Index +
810                               Suffix + ")")
811                                  .str());
812       }
813     }
814     if (SemaChecks.empty())
815       return "";
816     return (Twine("  return ") +
817             join(std::begin(SemaChecks), std::end(SemaChecks),
818                  " ||\n         ") +
819             ";\n")
820         .str();
821   }
822 
823   ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param);
824 };
825 
826 // -----------------------------------------------------------------------------
827 // The top-level class that holds all the state from analyzing the entire
828 // Tablegen input.
829 
830 class MveEmitter {
831   // MveEmitter holds a collection of all the types we've instantiated.
832   VoidType Void;
833   std::map<std::string, std::unique_ptr<ScalarType>> ScalarTypes;
834   std::map<std::pair<ScalarTypeKind, unsigned>, std::unique_ptr<VectorType>>
835       VectorTypes;
836   std::map<std::pair<std::string, unsigned>, std::unique_ptr<MultiVectorType>>
837       MultiVectorTypes;
838   std::map<unsigned, std::unique_ptr<PredicateType>> PredicateTypes;
839   std::map<std::string, std::unique_ptr<PointerType>> PointerTypes;
840 
841   // And all the ACLEIntrinsic instances we've created.
842   std::map<std::string, std::unique_ptr<ACLEIntrinsic>> ACLEIntrinsics;
843 
844 public:
845   // Methods to create a Type object, or return the right existing one from the
846   // maps stored in this object.
847   const VoidType *getVoidType() { return &Void; }
848   const ScalarType *getScalarType(StringRef Name) {
849     return ScalarTypes[Name].get();
850   }
851   const ScalarType *getScalarType(Record *R) {
852     return getScalarType(R->getName());
853   }
854   const VectorType *getVectorType(const ScalarType *ST) {
855     std::pair<ScalarTypeKind, unsigned> key(ST->kind(), ST->sizeInBits());
856     if (VectorTypes.find(key) == VectorTypes.end())
857       VectorTypes[key] = std::make_unique<VectorType>(ST);
858     return VectorTypes[key].get();
859   }
860   const MultiVectorType *getMultiVectorType(unsigned Registers,
861                                             const VectorType *VT) {
862     std::pair<std::string, unsigned> key(VT->cNameBase(), Registers);
863     if (MultiVectorTypes.find(key) == MultiVectorTypes.end())
864       MultiVectorTypes[key] = std::make_unique<MultiVectorType>(Registers, VT);
865     return MultiVectorTypes[key].get();
866   }
867   const PredicateType *getPredicateType(unsigned Lanes) {
868     unsigned key = Lanes;
869     if (PredicateTypes.find(key) == PredicateTypes.end())
870       PredicateTypes[key] = std::make_unique<PredicateType>(Lanes);
871     return PredicateTypes[key].get();
872   }
873   const PointerType *getPointerType(const Type *T, bool Const) {
874     PointerType PT(T, Const);
875     std::string key = PT.cName();
876     if (PointerTypes.find(key) == PointerTypes.end())
877       PointerTypes[key] = std::make_unique<PointerType>(PT);
878     return PointerTypes[key].get();
879   }
880 
881   // Methods to construct a type from various pieces of Tablegen. These are
882   // always called in the context of setting up a particular ACLEIntrinsic, so
883   // there's always an ambient parameter type (because we're iterating through
884   // the Params list in the Tablegen record for the intrinsic), which is used
885   // to expand Tablegen classes like 'Vector' which mean something different in
886   // each member of a parametric family.
887   const Type *getType(Record *R, const Type *Param);
888   const Type *getType(DagInit *D, const Type *Param);
889   const Type *getType(Init *I, const Type *Param);
890 
891   // Functions that translate the Tablegen representation of an intrinsic's
892   // code generation into a collection of Value objects (which will then be
893   // reprocessed to read out the actual C++ code included by CGBuiltin.cpp).
894   Result::Ptr getCodeForDag(DagInit *D, const Result::Scope &Scope,
895                             const Type *Param);
896   Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
897                                const Result::Scope &Scope, const Type *Param);
898   Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType);
899 
900   // Constructor and top-level functions.
901 
902   MveEmitter(RecordKeeper &Records);
903 
904   void EmitHeader(raw_ostream &OS);
905   void EmitBuiltinDef(raw_ostream &OS);
906   void EmitBuiltinSema(raw_ostream &OS);
907   void EmitBuiltinCG(raw_ostream &OS);
908   void EmitBuiltinAliases(raw_ostream &OS);
909 };
910 
911 const Type *MveEmitter::getType(Init *I, const Type *Param) {
912   if (auto Dag = dyn_cast<DagInit>(I))
913     return getType(Dag, Param);
914   if (auto Def = dyn_cast<DefInit>(I))
915     return getType(Def->getDef(), Param);
916 
917   PrintFatalError("Could not convert this value into a type");
918 }
919 
920 const Type *MveEmitter::getType(Record *R, const Type *Param) {
921   if (R->isSubClassOf("Immediate"))
922     R = R->getValueAsDef("type"); // pass to subfield
923 
924   if (R->getName() == "Void")
925     return getVoidType();
926   if (R->isSubClassOf("PrimitiveType"))
927     return getScalarType(R);
928   if (R->isSubClassOf("ComplexType"))
929     return getType(R->getValueAsDag("spec"), Param);
930 
931   PrintFatalError(R->getLoc(), "Could not convert this record into a type");
932 }
933 
934 const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
935   // The meat of the getType system: types in the Tablegen are represented by a
936   // dag whose operators select sub-cases of this function.
937 
938   Record *Op = cast<DefInit>(D->getOperator())->getDef();
939   if (!Op->isSubClassOf("ComplexTypeOp"))
940     PrintFatalError(
941         "Expected ComplexTypeOp as dag operator in type expression");
942 
943   if (Op->getName() == "CTO_Parameter") {
944     if (isa<VoidType>(Param))
945       PrintFatalError("Parametric type in unparametrised context");
946     return Param;
947   }
948 
949   if (Op->getName() == "CTO_Vec") {
950     const Type *Element = getType(D->getArg(0), Param);
951     return getVectorType(cast<ScalarType>(Element));
952   }
953 
954   if (Op->getName() == "CTO_Pred") {
955     const Type *Element = getType(D->getArg(0), Param);
956     return getPredicateType(128 / Element->sizeInBits());
957   }
958 
959   if (Op->isSubClassOf("CTO_Tuple")) {
960     unsigned Registers = Op->getValueAsInt("n");
961     const Type *Element = getType(D->getArg(0), Param);
962     return getMultiVectorType(Registers, cast<VectorType>(Element));
963   }
964 
965   if (Op->isSubClassOf("CTO_Pointer")) {
966     const Type *Pointee = getType(D->getArg(0), Param);
967     return getPointerType(Pointee, Op->getValueAsBit("const"));
968   }
969 
970   if (Op->isSubClassOf("CTO_Sign")) {
971     const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param));
972     ScalarTypeKind NewKind = Op->getValueAsBit("signed")
973                                  ? ScalarTypeKind::SignedInt
974                                  : ScalarTypeKind::UnsignedInt;
975     for (const auto &kv : ScalarTypes) {
976       const ScalarType *RT = kv.second.get();
977       if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits())
978         return RT;
979     }
980     PrintFatalError("Cannot change sign of this type");
981   }
982 
983   PrintFatalError("Bad operator in type dag expression");
984 }
985 
986 Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
987                                       const Type *Param) {
988   Record *Op = cast<DefInit>(D->getOperator())->getDef();
989 
990   if (Op->getName() == "seq") {
991     Result::Scope SubScope = Scope;
992     Result::Ptr PrevV = nullptr;
993     for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i) {
994       // We don't use getCodeForDagArg here, because the argument name
995       // has different semantics in a seq
996       Result::Ptr V =
997           getCodeForDag(cast<DagInit>(D->getArg(i)), SubScope, Param);
998       StringRef ArgName = D->getArgNameStr(i);
999       if (!ArgName.empty())
1000         SubScope[ArgName] = V;
1001       if (PrevV)
1002         V->setPredecessor(PrevV);
1003       PrevV = V;
1004     }
1005     return PrevV;
1006   } else if (Op->isSubClassOf("Type")) {
1007     if (D->getNumArgs() != 1)
1008       PrintFatalError("Type casts should have exactly one argument");
1009     const Type *CastType = getType(Op, Param);
1010     Result::Ptr Arg = getCodeForDagArg(D, 0, Scope, Param);
1011     if (const auto *ST = dyn_cast<ScalarType>(CastType)) {
1012       if (!ST->requiresFloat()) {
1013         if (Arg->hasIntegerConstantValue())
1014           return std::make_shared<IntLiteralResult>(
1015               ST, Arg->integerConstantValue());
1016         else
1017           return std::make_shared<IntCastResult>(ST, Arg);
1018       }
1019     }
1020     PrintFatalError("Unsupported type cast");
1021   } else {
1022     std::vector<Result::Ptr> Args;
1023     for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
1024       Args.push_back(getCodeForDagArg(D, i, Scope, Param));
1025     if (Op->isSubClassOf("IRBuilder")) {
1026       std::set<unsigned> AddressArgs;
1027       for (unsigned i : Op->getValueAsListOfInts("address_params"))
1028         AddressArgs.insert(i);
1029       std::set<unsigned> IntConstantArgs;
1030       for (unsigned i : Op->getValueAsListOfInts("int_constant_params"))
1031         IntConstantArgs.insert(i);
1032       return std::make_shared<IRBuilderResult>(
1033           Op->getValueAsString("func"), Args, AddressArgs, IntConstantArgs);
1034     } else if (Op->isSubClassOf("IRInt")) {
1035       std::vector<const Type *> ParamTypes;
1036       for (Record *RParam : Op->getValueAsListOfDefs("params"))
1037         ParamTypes.push_back(getType(RParam, Param));
1038       std::string IntName = Op->getValueAsString("intname");
1039       if (Op->getValueAsBit("appendKind"))
1040         IntName += "_" + toLetter(cast<ScalarType>(Param)->kind());
1041       return std::make_shared<IRIntrinsicResult>(IntName, ParamTypes, Args);
1042     } else {
1043       PrintFatalError("Unsupported dag node " + Op->getName());
1044     }
1045   }
1046 }
1047 
1048 Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
1049                                          const Result::Scope &Scope,
1050                                          const Type *Param) {
1051   Init *Arg = D->getArg(ArgNum);
1052   StringRef Name = D->getArgNameStr(ArgNum);
1053 
1054   if (!Name.empty()) {
1055     if (!isa<UnsetInit>(Arg))
1056       PrintFatalError(
1057           "dag operator argument should not have both a value and a name");
1058     auto it = Scope.find(Name);
1059     if (it == Scope.end())
1060       PrintFatalError("unrecognized variable name '" + Name + "'");
1061     return it->second;
1062   }
1063 
1064   if (auto *II = dyn_cast<IntInit>(Arg))
1065     return std::make_shared<IntLiteralResult>(getScalarType("u32"),
1066                                               II->getValue());
1067 
1068   if (auto *DI = dyn_cast<DagInit>(Arg))
1069     return getCodeForDag(DI, Scope, Param);
1070 
1071   PrintFatalError("bad dag argument type for code generation");
1072 }
1073 
1074 Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType) {
1075   Result::Ptr V =
1076       std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
1077 
1078   if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
1079     if (ST->isInteger() && ST->sizeInBits() < 32)
1080       V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
1081   } else if (const auto *PT = dyn_cast<PredicateType>(ArgType)) {
1082     V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
1083     V = std::make_shared<IRIntrinsicResult>(
1084         "pred_i2v", std::vector<const Type *>{PT}, std::vector<Result::Ptr>{V});
1085   }
1086 
1087   return V;
1088 }
1089 
1090 ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
1091     : ReturnType(ME.getType(R->getValueAsDef("ret"), Param)) {
1092   // Derive the intrinsic's full name, by taking the name of the
1093   // Tablegen record (or override) and appending the suffix from its
1094   // parameter type. (If the intrinsic is unparametrised, its
1095   // parameter type will be given as Void, which returns the empty
1096   // string for acleSuffix.)
1097   StringRef BaseName =
1098       (R->isSubClassOf("NameOverride") ? R->getValueAsString("basename")
1099                                        : R->getName());
1100   FullName = (Twine(BaseName) + Param->acleSuffix()).str();
1101 
1102   // Derive the intrinsic's polymorphic name, by removing components from the
1103   // full name as specified by its 'pnt' member ('polymorphic name type'),
1104   // which indicates how many type suffixes to remove, and any other piece of
1105   // the name that should be removed.
1106   Record *PolymorphicNameType = R->getValueAsDef("pnt");
1107   SmallVector<StringRef, 8> NameParts;
1108   StringRef(FullName).split(NameParts, '_');
1109   for (unsigned i = 0, e = PolymorphicNameType->getValueAsInt(
1110                            "NumTypeSuffixesToDiscard");
1111        i < e; ++i)
1112     NameParts.pop_back();
1113   if (!PolymorphicNameType->isValueUnset("ExtraSuffixToDiscard")) {
1114     StringRef ExtraSuffix =
1115         PolymorphicNameType->getValueAsString("ExtraSuffixToDiscard");
1116     auto it = NameParts.end();
1117     while (it != NameParts.begin()) {
1118       --it;
1119       if (*it == ExtraSuffix) {
1120         NameParts.erase(it);
1121         break;
1122       }
1123     }
1124   }
1125   ShortName = join(std::begin(NameParts), std::end(NameParts), "_");
1126 
1127   // Process the intrinsic's argument list.
1128   DagInit *ArgsDag = R->getValueAsDag("args");
1129   Result::Scope Scope;
1130   for (unsigned i = 0, e = ArgsDag->getNumArgs(); i < e; ++i) {
1131     Init *TypeInit = ArgsDag->getArg(i);
1132 
1133     // Work out the type of the argument, for use in the function prototype in
1134     // the header file.
1135     const Type *ArgType = ME.getType(TypeInit, Param);
1136     ArgTypes.push_back(ArgType);
1137 
1138     // The argument will usually have a name in the arguments dag, which goes
1139     // into the variable-name scope that the code gen will refer to.
1140     StringRef ArgName = ArgsDag->getArgNameStr(i);
1141     if (!ArgName.empty())
1142       Scope[ArgName] = ME.getCodeForArg(i, ArgType);
1143 
1144     // If the argument is a subclass of Immediate, record the details about
1145     // what values it can take, for Sema checking.
1146     if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) {
1147       Record *TypeRec = TypeDI->getDef();
1148       if (TypeRec->isSubClassOf("Immediate")) {
1149         Record *Bounds = TypeRec->getValueAsDef("bounds");
1150         ImmediateArg &IA = ImmediateArgs[i];
1151         if (Bounds->isSubClassOf("IB_ConstRange")) {
1152           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
1153           IA.i1 = Bounds->getValueAsInt("lo");
1154           IA.i2 = Bounds->getValueAsInt("hi");
1155         } else if (Bounds->getName() == "IB_UEltValue") {
1156           IA.boundsType = ImmediateArg::BoundsType::UInt;
1157           IA.i1 = Param->sizeInBits();
1158         } else if (Bounds->getName() == "IB_LaneIndex") {
1159           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
1160           IA.i1 = 0;
1161           IA.i2 = 128 / Param->sizeInBits();
1162         } else if (Bounds->getName() == "IB_EltBit") {
1163           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
1164           IA.i1 = Bounds->getValueAsInt("base");
1165           IA.i2 = IA.i1 + Param->sizeInBits() - 1;
1166         } else {
1167           PrintFatalError("unrecognised ImmediateBounds subclass");
1168         }
1169 
1170         IA.ArgType = ArgType;
1171 
1172         if (!TypeRec->isValueUnset("extra")) {
1173           IA.ExtraCheckType = TypeRec->getValueAsString("extra");
1174           if (!TypeRec->isValueUnset("extraarg"))
1175             IA.ExtraCheckArgs = TypeRec->getValueAsString("extraarg");
1176         }
1177       }
1178     }
1179   }
1180 
1181   // Finally, go through the codegen dag and translate it into a Result object
1182   // (with an arbitrary DAG of depended-on Results hanging off it).
1183   DagInit *CodeDag = R->getValueAsDag("codegen");
1184   Record *MainOp = cast<DefInit>(CodeDag->getOperator())->getDef();
1185   if (MainOp->isSubClassOf("CustomCodegen")) {
1186     // Or, if it's the special case of CustomCodegen, just accumulate
1187     // a list of parameters we're going to assign to variables before
1188     // breaking from the loop.
1189     CustomCodeGenArgs["CustomCodeGenType"] =
1190         (Twine("CustomCodeGen::") + MainOp->getValueAsString("type")).str();
1191     for (unsigned i = 0, e = CodeDag->getNumArgs(); i < e; ++i) {
1192       StringRef Name = CodeDag->getArgNameStr(i);
1193       if (Name.empty()) {
1194         PrintFatalError("Operands to CustomCodegen should have names");
1195       } else if (auto *II = dyn_cast<IntInit>(CodeDag->getArg(i))) {
1196         CustomCodeGenArgs[Name] = itostr(II->getValue());
1197       } else if (auto *SI = dyn_cast<StringInit>(CodeDag->getArg(i))) {
1198         CustomCodeGenArgs[Name] = SI->getValue();
1199       } else {
1200         PrintFatalError("Operands to CustomCodegen should be integers");
1201       }
1202     }
1203   } else {
1204     Code = ME.getCodeForDag(CodeDag, Scope, Param);
1205   }
1206 }
1207 
1208 MveEmitter::MveEmitter(RecordKeeper &Records) {
1209   // Construct the whole MveEmitter.
1210 
1211   // First, look up all the instances of PrimitiveType. This gives us the list
1212   // of vector typedefs we have to put in arm_mve.h, and also allows us to
1213   // collect all the useful ScalarType instances into a big list so that we can
1214   // use it for operations such as 'find the unsigned version of this signed
1215   // integer type'.
1216   for (Record *R : Records.getAllDerivedDefinitions("PrimitiveType"))
1217     ScalarTypes[R->getName()] = std::make_unique<ScalarType>(R);
1218 
1219   // Now go through the instances of Intrinsic, and for each one, iterate
1220   // through its list of type parameters making an ACLEIntrinsic for each one.
1221   for (Record *R : Records.getAllDerivedDefinitions("Intrinsic")) {
1222     for (Record *RParam : R->getValueAsListOfDefs("params")) {
1223       const Type *Param = getType(RParam, getVoidType());
1224       auto Intrinsic = std::make_unique<ACLEIntrinsic>(*this, R, Param);
1225       ACLEIntrinsics[Intrinsic->fullName()] = std::move(Intrinsic);
1226     }
1227   }
1228 }
1229 
1230 /// A wrapper on raw_string_ostream that contains its own buffer rather than
1231 /// having to point it at one elsewhere. (In other words, it works just like
1232 /// std::ostringstream; also, this makes it convenient to declare a whole array
1233 /// of them at once.)
1234 ///
1235 /// We have to set this up using multiple inheritance, to ensure that the
1236 /// string member has been constructed before raw_string_ostream's constructor
1237 /// is given a pointer to it.
1238 class string_holder {
1239 protected:
1240   std::string S;
1241 };
1242 class raw_self_contained_string_ostream : private string_holder,
1243                                           public raw_string_ostream {
1244 public:
1245   raw_self_contained_string_ostream()
1246       : string_holder(), raw_string_ostream(S) {}
1247 };
1248 
1249 void MveEmitter::EmitHeader(raw_ostream &OS) {
1250   // Accumulate pieces of the header file that will be enabled under various
1251   // different combinations of #ifdef. The index into parts[] is made up of
1252   // the following bit flags.
1253   constexpr unsigned Float = 1;
1254   constexpr unsigned UseUserNamespace = 2;
1255 
1256   constexpr unsigned NumParts = 4;
1257   raw_self_contained_string_ostream parts[NumParts];
1258 
1259   // Write typedefs for all the required vector types, and a few scalar
1260   // types that don't already have the name we want them to have.
1261 
1262   parts[0] << "typedef uint16_t mve_pred16_t;\n";
1263   parts[Float] << "typedef __fp16 float16_t;\n"
1264                   "typedef float float32_t;\n";
1265   for (const auto &kv : ScalarTypes) {
1266     const ScalarType *ST = kv.second.get();
1267     raw_ostream &OS = parts[ST->requiresFloat() ? Float : 0];
1268     const VectorType *VT = getVectorType(ST);
1269 
1270     OS << "typedef __attribute__((neon_vector_type(" << VT->lanes() << "))) "
1271        << ST->cName() << " " << VT->cName() << ";\n";
1272 
1273     // Every vector type also comes with a pair of multi-vector types for
1274     // the VLD2 and VLD4 instructions.
1275     for (unsigned n = 2; n <= 4; n += 2) {
1276       const MultiVectorType *MT = getMultiVectorType(n, VT);
1277       OS << "typedef struct { " << VT->cName() << " val[" << n << "]; } "
1278          << MT->cName() << ";\n";
1279     }
1280   }
1281   parts[0] << "\n";
1282   parts[Float] << "\n";
1283 
1284   // Write declarations for all the intrinsics.
1285 
1286   for (const auto &kv : ACLEIntrinsics) {
1287     const ACLEIntrinsic &Int = *kv.second;
1288 
1289     // We generate each intrinsic twice, under its full unambiguous
1290     // name and its shorter polymorphic name (if the latter exists).
1291     for (bool Polymorphic : {false, true}) {
1292       if (Polymorphic && !Int.polymorphic())
1293         continue;
1294 
1295       // We also generate each intrinsic under a name like __arm_vfooq
1296       // (which is in C language implementation namespace, so it's
1297       // safe to define in any conforming user program) and a shorter
1298       // one like vfooq (which is in user namespace, so a user might
1299       // reasonably have used it for something already). If so, they
1300       // can #define __ARM_MVE_PRESERVE_USER_NAMESPACE before
1301       // including the header, which will suppress the shorter names
1302       // and leave only the implementation-namespace ones. Then they
1303       // have to write __arm_vfooq everywhere, of course.
1304 
1305       for (bool UserNamespace : {false, true}) {
1306         raw_ostream &OS = parts[(Int.requiresFloat() ? Float : 0) |
1307                                 (UserNamespace ? UseUserNamespace : 0)];
1308 
1309         // Make the name of the function in this declaration.
1310 
1311         std::string FunctionName =
1312             Polymorphic ? Int.shortName() : Int.fullName();
1313         if (!UserNamespace)
1314           FunctionName = "__arm_" + FunctionName;
1315 
1316         // Make strings for the types involved in the function's
1317         // prototype.
1318 
1319         std::string RetTypeName = Int.returnType()->cName();
1320         if (!StringRef(RetTypeName).endswith("*"))
1321           RetTypeName += " ";
1322 
1323         std::vector<std::string> ArgTypeNames;
1324         for (const Type *ArgTypePtr : Int.argTypes())
1325           ArgTypeNames.push_back(ArgTypePtr->cName());
1326         std::string ArgTypesString =
1327             join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
1328 
1329         // Emit the actual declaration. All these functions are
1330         // declared 'static inline' without a body, which is fine
1331         // provided clang recognizes them as builtins, and has the
1332         // effect that this type signature is used in place of the one
1333         // that Builtins.def didn't provide. That's how we can get
1334         // structure types that weren't defined until this header was
1335         // included to be part of the type signature of a builtin that
1336         // was known to clang already.
1337         //
1338         // The declarations use __attribute__(__clang_arm_mve_alias),
1339         // so that each function declared will be recognized as the
1340         // appropriate MVE builtin in spite of its user-facing name.
1341         //
1342         // (That's better than making them all wrapper functions,
1343         // partly because it avoids any compiler error message citing
1344         // the wrapper function definition instead of the user's code,
1345         // and mostly because some MVE intrinsics have arguments
1346         // required to be compile-time constants, and that property
1347         // can't be propagated through a wrapper function. It can be
1348         // propagated through a macro, but macros can't be overloaded
1349         // on argument types very easily - you have to use _Generic,
1350         // which makes error messages very confusing when the user
1351         // gets it wrong.)
1352         //
1353         // Finally, the polymorphic versions of the intrinsics are
1354         // also defined with __attribute__(overloadable), so that when
1355         // the same name is defined with several type signatures, the
1356         // right thing happens. Each one of the overloaded
1357         // declarations is given a different builtin id, which
1358         // has exactly the effect we want: first clang resolves the
1359         // overload to the right function, then it knows which builtin
1360         // it's referring to, and then the Sema checking for that
1361         // builtin can check further things like the constant
1362         // arguments.
1363         //
1364         // One more subtlety is the newline just before the return
1365         // type name. That's a cosmetic tweak to make the error
1366         // messages legible if the user gets the types wrong in a call
1367         // to a polymorphic function: this way, clang will print just
1368         // the _final_ line of each declaration in the header, to show
1369         // the type signatures that would have been legal. So all the
1370         // confusing machinery with __attribute__ is left out of the
1371         // error message, and the user sees something that's more or
1372         // less self-documenting: "here's a list of actually readable
1373         // type signatures for vfooq(), and here's why each one didn't
1374         // match your call".
1375 
1376         OS << "static __inline__ __attribute__(("
1377            << (Polymorphic ? "overloadable, " : "")
1378            << "__clang_arm_mve_alias(__builtin_arm_mve_" << Int.fullName()
1379            << ")))\n"
1380            << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
1381       }
1382     }
1383   }
1384   for (auto &part : parts)
1385     part << "\n";
1386 
1387   // Now we've finished accumulating bits and pieces into the parts[] array.
1388   // Put it all together to write the final output file.
1389 
1390   OS << "/*===---- arm_mve.h - ARM MVE intrinsics "
1391         "-----------------------------------===\n"
1392         " *\n"
1393         " *\n"
1394         " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
1395         "Exceptions.\n"
1396         " * See https://llvm.org/LICENSE.txt for license information.\n"
1397         " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
1398         " *\n"
1399         " *===-------------------------------------------------------------"
1400         "----"
1401         "------===\n"
1402         " */\n"
1403         "\n"
1404         "#ifndef __ARM_MVE_H\n"
1405         "#define __ARM_MVE_H\n"
1406         "\n"
1407         "#if !__ARM_FEATURE_MVE\n"
1408         "#error \"MVE support not enabled\"\n"
1409         "#endif\n"
1410         "\n"
1411         "#include <stdint.h>\n"
1412         "\n";
1413 
1414   for (size_t i = 0; i < NumParts; ++i) {
1415     std::vector<std::string> conditions;
1416     if (i & Float)
1417       conditions.push_back("(__ARM_FEATURE_MVE & 2)");
1418     if (i & UseUserNamespace)
1419       conditions.push_back("(!defined __ARM_MVE_PRESERVE_USER_NAMESPACE)");
1420 
1421     std::string condition =
1422         join(std::begin(conditions), std::end(conditions), " && ");
1423     if (!condition.empty())
1424       OS << "#if " << condition << "\n\n";
1425     OS << parts[i].str();
1426     if (!condition.empty())
1427       OS << "#endif /* " << condition << " */\n\n";
1428   }
1429 
1430   OS << "#endif /* __ARM_MVE_H */\n";
1431 }
1432 
1433 void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
1434   for (const auto &kv : ACLEIntrinsics) {
1435     const ACLEIntrinsic &Int = *kv.second;
1436     OS << "TARGET_HEADER_BUILTIN(__builtin_arm_mve_" << Int.fullName()
1437        << ", \"\", \"n\", \"arm_mve.h\", ALL_LANGUAGES, \"\")\n";
1438   }
1439 
1440   std::set<std::string> ShortNamesSeen;
1441 
1442   for (const auto &kv : ACLEIntrinsics) {
1443     const ACLEIntrinsic &Int = *kv.second;
1444     if (Int.polymorphic()) {
1445       StringRef Name = Int.shortName();
1446       if (ShortNamesSeen.find(Name) == ShortNamesSeen.end()) {
1447         OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt\")\n";
1448         ShortNamesSeen.insert(Name);
1449       }
1450     }
1451   }
1452 }
1453 
1454 void MveEmitter::EmitBuiltinSema(raw_ostream &OS) {
1455   std::map<std::string, std::set<std::string>> Checks;
1456 
1457   for (const auto &kv : ACLEIntrinsics) {
1458     const ACLEIntrinsic &Int = *kv.second;
1459     std::string Check = Int.genSema();
1460     if (!Check.empty())
1461       Checks[Check].insert(Int.fullName());
1462   }
1463 
1464   for (const auto &kv : Checks) {
1465     for (StringRef Name : kv.second)
1466       OS << "case ARM::BI__builtin_arm_mve_" << Name << ":\n";
1467     OS << kv.first;
1468   }
1469 }
1470 
1471 // Machinery for the grouping of intrinsics by similar codegen.
1472 //
1473 // The general setup is that 'MergeableGroup' stores the things that a set of
1474 // similarly shaped intrinsics have in common: the text of their code
1475 // generation, and the number and type of their parameter variables.
1476 // MergeableGroup is the key in a std::map whose value is a set of
1477 // OutputIntrinsic, which stores the ways in which a particular intrinsic
1478 // specializes the MergeableGroup's generic description: the function name and
1479 // the _values_ of the parameter variables.
1480 
1481 struct ComparableStringVector : std::vector<std::string> {
1482   // Infrastructure: a derived class of vector<string> which comes with an
1483   // ordering, so that it can be used as a key in maps and an element in sets.
1484   // There's no requirement on the ordering beyond being deterministic.
1485   bool operator<(const ComparableStringVector &rhs) const {
1486     if (size() != rhs.size())
1487       return size() < rhs.size();
1488     for (size_t i = 0, e = size(); i < e; ++i)
1489       if ((*this)[i] != rhs[i])
1490         return (*this)[i] < rhs[i];
1491     return false;
1492   }
1493 };
1494 
1495 struct OutputIntrinsic {
1496   const ACLEIntrinsic *Int;
1497   std::string Name;
1498   ComparableStringVector ParamValues;
1499   bool operator<(const OutputIntrinsic &rhs) const {
1500     if (Name != rhs.Name)
1501       return Name < rhs.Name;
1502     return ParamValues < rhs.ParamValues;
1503   }
1504 };
1505 struct MergeableGroup {
1506   std::string Code;
1507   ComparableStringVector ParamTypes;
1508   bool operator<(const MergeableGroup &rhs) const {
1509     if (Code != rhs.Code)
1510       return Code < rhs.Code;
1511     return ParamTypes < rhs.ParamTypes;
1512   }
1513 };
1514 
1515 void MveEmitter::EmitBuiltinCG(raw_ostream &OS) {
1516   // Pass 1: generate code for all the intrinsics as if every type or constant
1517   // that can possibly be abstracted out into a parameter variable will be.
1518   // This identifies the sets of intrinsics we'll group together into a single
1519   // piece of code generation.
1520 
1521   std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroupsPrelim;
1522 
1523   for (const auto &kv : ACLEIntrinsics) {
1524     const ACLEIntrinsic &Int = *kv.second;
1525 
1526     MergeableGroup MG;
1527     OutputIntrinsic OI;
1528 
1529     OI.Int = &Int;
1530     OI.Name = Int.fullName();
1531     CodeGenParamAllocator ParamAllocPrelim{&MG.ParamTypes, &OI.ParamValues};
1532     raw_string_ostream OS(MG.Code);
1533     Int.genCode(OS, ParamAllocPrelim, 1);
1534     OS.flush();
1535 
1536     MergeableGroupsPrelim[MG].insert(OI);
1537   }
1538 
1539   // Pass 2: for each of those groups, optimize the parameter variable set by
1540   // eliminating 'parameters' that are the same for all intrinsics in the
1541   // group, and merging together pairs of parameter variables that take the
1542   // same values as each other for all intrinsics in the group.
1543 
1544   std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroups;
1545 
1546   for (const auto &kv : MergeableGroupsPrelim) {
1547     const MergeableGroup &MG = kv.first;
1548     std::vector<int> ParamNumbers;
1549     std::map<ComparableStringVector, int> ParamNumberMap;
1550 
1551     // Loop over the parameters for this group.
1552     for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
1553       // Is this parameter the same for all intrinsics in the group?
1554       const OutputIntrinsic &OI_first = *kv.second.begin();
1555       bool Constant = all_of(kv.second, [&](const OutputIntrinsic &OI) {
1556         return OI.ParamValues[i] == OI_first.ParamValues[i];
1557       });
1558 
1559       // If so, record it as -1, meaning 'no parameter variable needed'. Then
1560       // the corresponding call to allocParam in pass 2 will not generate a
1561       // variable at all, and just use the value inline.
1562       if (Constant) {
1563         ParamNumbers.push_back(-1);
1564         continue;
1565       }
1566 
1567       // Otherwise, make a list of the values this parameter takes for each
1568       // intrinsic, and see if that value vector matches anything we already
1569       // have. We also record the parameter type, so that we don't accidentally
1570       // match up two parameter variables with different types. (Not that
1571       // there's much chance of them having textually equivalent values, but in
1572       // _principle_ it could happen.)
1573       ComparableStringVector key;
1574       key.push_back(MG.ParamTypes[i]);
1575       for (const auto &OI : kv.second)
1576         key.push_back(OI.ParamValues[i]);
1577 
1578       auto Found = ParamNumberMap.find(key);
1579       if (Found != ParamNumberMap.end()) {
1580         // Yes, an existing parameter variable can be reused for this.
1581         ParamNumbers.push_back(Found->second);
1582         continue;
1583       }
1584 
1585       // No, we need a new parameter variable.
1586       int ExistingIndex = ParamNumberMap.size();
1587       ParamNumberMap[key] = ExistingIndex;
1588       ParamNumbers.push_back(ExistingIndex);
1589     }
1590 
1591     // Now we're ready to do the pass 2 code generation, which will emit the
1592     // reduced set of parameter variables we've just worked out.
1593 
1594     for (const auto &OI_prelim : kv.second) {
1595       const ACLEIntrinsic *Int = OI_prelim.Int;
1596 
1597       MergeableGroup MG;
1598       OutputIntrinsic OI;
1599 
1600       OI.Int = OI_prelim.Int;
1601       OI.Name = OI_prelim.Name;
1602       CodeGenParamAllocator ParamAlloc{&MG.ParamTypes, &OI.ParamValues,
1603                                        &ParamNumbers};
1604       raw_string_ostream OS(MG.Code);
1605       Int->genCode(OS, ParamAlloc, 2);
1606       OS.flush();
1607 
1608       MergeableGroups[MG].insert(OI);
1609     }
1610   }
1611 
1612   // Output the actual C++ code.
1613 
1614   for (const auto &kv : MergeableGroups) {
1615     const MergeableGroup &MG = kv.first;
1616 
1617     // List of case statements in the main switch on BuiltinID, and an open
1618     // brace.
1619     const char *prefix = "";
1620     for (const auto &OI : kv.second) {
1621       OS << prefix << "case ARM::BI__builtin_arm_mve_" << OI.Name << ":";
1622       prefix = "\n";
1623     }
1624     OS << " {\n";
1625 
1626     if (!MG.ParamTypes.empty()) {
1627       // If we've got some parameter variables, then emit their declarations...
1628       for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
1629         StringRef Type = MG.ParamTypes[i];
1630         OS << "  " << Type;
1631         if (!Type.endswith("*"))
1632           OS << " ";
1633         OS << " Param" << utostr(i) << ";\n";
1634       }
1635 
1636       // ... and an inner switch on BuiltinID that will fill them in with each
1637       // individual intrinsic's values.
1638       OS << "  switch (BuiltinID) {\n";
1639       for (const auto &OI : kv.second) {
1640         OS << "  case ARM::BI__builtin_arm_mve_" << OI.Name << ":\n";
1641         for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i)
1642           OS << "    Param" << utostr(i) << " = " << OI.ParamValues[i] << ";\n";
1643         OS << "    break;\n";
1644       }
1645       OS << "  }\n";
1646     }
1647 
1648     // And finally, output the code, and close the outer pair of braces. (The
1649     // code will always end with a 'return' statement, so we need not insert a
1650     // 'break' here.)
1651     OS << MG.Code << "}\n";
1652   }
1653 }
1654 
1655 void MveEmitter::EmitBuiltinAliases(raw_ostream &OS) {
1656   for (const auto &kv : ACLEIntrinsics) {
1657     const ACLEIntrinsic &Int = *kv.second;
1658     OS << "case ARM::BI__builtin_arm_mve_" << Int.fullName() << ":\n"
1659        << "  return AliasName == \"" << Int.fullName() << "\"";
1660     if (Int.polymorphic())
1661       OS << " || AliasName == \"" << Int.shortName() << "\"";
1662     OS << ";\n";
1663   }
1664 }
1665 
1666 } // namespace
1667 
1668 namespace clang {
1669 
1670 void EmitMveHeader(RecordKeeper &Records, raw_ostream &OS) {
1671   MveEmitter(Records).EmitHeader(OS);
1672 }
1673 
1674 void EmitMveBuiltinDef(RecordKeeper &Records, raw_ostream &OS) {
1675   MveEmitter(Records).EmitBuiltinDef(OS);
1676 }
1677 
1678 void EmitMveBuiltinSema(RecordKeeper &Records, raw_ostream &OS) {
1679   MveEmitter(Records).EmitBuiltinSema(OS);
1680 }
1681 
1682 void EmitMveBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
1683   MveEmitter(Records).EmitBuiltinCG(OS);
1684 }
1685 
1686 void EmitMveBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) {
1687   MveEmitter(Records).EmitBuiltinAliases(OS);
1688 }
1689 
1690 } // end namespace clang
1691