xref: /minix3/external/bsd/llvm/dist/clang/utils/TableGen/NeonEmitter.cpp (revision 0b98e8aad89f2bd4ba80b523d73cf29e9dd82ce1)
1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddlHi,
44   OpAddw,
45   OpAddwHi,
46   OpSub,
47   OpSubl,
48   OpSublHi,
49   OpSubw,
50   OpSubwHi,
51   OpMul,
52   OpMla,
53   OpMlal,
54   OpMullHi,
55   OpMlalHi,
56   OpMls,
57   OpMlsl,
58   OpMlslHi,
59   OpMulN,
60   OpMlaN,
61   OpMlsN,
62   OpMlalN,
63   OpMlslN,
64   OpMulLane,
65   OpMulXLane,
66   OpMullLane,
67   OpMullHiLane,
68   OpMlaLane,
69   OpMlsLane,
70   OpMlalLane,
71   OpMlalHiLane,
72   OpMlslLane,
73   OpMlslHiLane,
74   OpQDMullLane,
75   OpQDMullHiLane,
76   OpQDMlalLane,
77   OpQDMlalHiLane,
78   OpQDMlslLane,
79   OpQDMlslHiLane,
80   OpQDMulhLane,
81   OpQRDMulhLane,
82   OpFMSLane,
83   OpFMSLaneQ,
84   OpTrn1,
85   OpZip1,
86   OpUzp1,
87   OpTrn2,
88   OpZip2,
89   OpUzp2,
90   OpEq,
91   OpGe,
92   OpLe,
93   OpGt,
94   OpLt,
95   OpNeg,
96   OpNot,
97   OpAnd,
98   OpOr,
99   OpXor,
100   OpAndNot,
101   OpOrNot,
102   OpCast,
103   OpConcat,
104   OpDup,
105   OpDupLane,
106   OpHi,
107   OpLo,
108   OpSelect,
109   OpRev16,
110   OpRev32,
111   OpRev64,
112   OpXtnHi,
113   OpSqxtunHi,
114   OpQxtnHi,
115   OpFcvtnHi,
116   OpFcvtlHi,
117   OpFcvtxnHi,
118   OpReinterpret,
119   OpAddhnHi,
120   OpRAddhnHi,
121   OpSubhnHi,
122   OpRSubhnHi,
123   OpAbdl,
124   OpAbdlHi,
125   OpAba,
126   OpAbal,
127   OpAbalHi,
128   OpQDMullHi,
129   OpQDMlalHi,
130   OpQDMlslHi,
131   OpDiv,
132   OpLongHi,
133   OpNarrowHi,
134   OpMovlHi,
135   OpCopyLane,
136   OpCopyQLane,
137   OpCopyLaneQ,
138   OpScalarMulLane,
139   OpScalarMulLaneQ,
140   OpScalarMulXLane,
141   OpScalarMulXLaneQ,
142   OpScalarVMulXLane,
143   OpScalarVMulXLaneQ,
144   OpScalarQDMullLane,
145   OpScalarQDMullLaneQ,
146   OpScalarQDMulHiLane,
147   OpScalarQDMulHiLaneQ,
148   OpScalarQRDMulHiLane,
149   OpScalarQRDMulHiLaneQ
150 };
151 
152 enum ClassKind {
153   ClassNone,
154   ClassI,           // generic integer instruction, e.g., "i8" suffix
155   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
156   ClassW,           // width-specific instruction, e.g., "8" suffix
157   ClassB,           // bitcast arguments with enum argument to specify type
158   ClassL,           // Logical instructions which are op instructions
159                     // but we need to not emit any suffix for in our
160                     // tests.
161   ClassNoTest       // Instructions which we do not test since they are
162                     // not TRUE instructions.
163 };
164 
165 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
166 /// builtins.  These must be kept in sync with the flags in
167 /// include/clang/Basic/TargetBuiltins.h.
168 namespace {
169 class NeonTypeFlags {
170   enum {
171     EltTypeMask = 0xf,
172     UnsignedFlag = 0x10,
173     QuadFlag = 0x20
174   };
175   uint32_t Flags;
176 
177 public:
178   enum EltType {
179     Int8,
180     Int16,
181     Int32,
182     Int64,
183     Poly8,
184     Poly16,
185     Poly64,
186     Float16,
187     Float32,
188     Float64
189   };
190 
191   NeonTypeFlags(unsigned F) : Flags(F) {}
192   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
193     if (IsUnsigned)
194       Flags |= UnsignedFlag;
195     if (IsQuad)
196       Flags |= QuadFlag;
197   }
198 
199   uint32_t getFlags() const { return Flags; }
200 };
201 } // end anonymous namespace
202 
203 namespace {
204 class NeonEmitter {
205   RecordKeeper &Records;
206   StringMap<OpKind> OpMap;
207   DenseMap<Record*, ClassKind> ClassMap;
208 
209 public:
210   NeonEmitter(RecordKeeper &R) : Records(R) {
211     OpMap["OP_NONE"]  = OpNone;
212     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
213     OpMap["OP_ADD"]   = OpAdd;
214     OpMap["OP_ADDL"]  = OpAddl;
215     OpMap["OP_ADDLHi"] = OpAddlHi;
216     OpMap["OP_ADDW"]  = OpAddw;
217     OpMap["OP_ADDWHi"] = OpAddwHi;
218     OpMap["OP_SUB"]   = OpSub;
219     OpMap["OP_SUBL"]  = OpSubl;
220     OpMap["OP_SUBLHi"] = OpSublHi;
221     OpMap["OP_SUBW"]  = OpSubw;
222     OpMap["OP_SUBWHi"] = OpSubwHi;
223     OpMap["OP_MUL"]   = OpMul;
224     OpMap["OP_MLA"]   = OpMla;
225     OpMap["OP_MLAL"]  = OpMlal;
226     OpMap["OP_MULLHi"]  = OpMullHi;
227     OpMap["OP_MLALHi"]  = OpMlalHi;
228     OpMap["OP_MLS"]   = OpMls;
229     OpMap["OP_MLSL"]  = OpMlsl;
230     OpMap["OP_MLSLHi"] = OpMlslHi;
231     OpMap["OP_MUL_N"] = OpMulN;
232     OpMap["OP_MLA_N"] = OpMlaN;
233     OpMap["OP_MLS_N"] = OpMlsN;
234     OpMap["OP_MLAL_N"] = OpMlalN;
235     OpMap["OP_MLSL_N"] = OpMlslN;
236     OpMap["OP_MUL_LN"]= OpMulLane;
237     OpMap["OP_MULX_LN"]= OpMulXLane;
238     OpMap["OP_MULL_LN"] = OpMullLane;
239     OpMap["OP_MULLHi_LN"] = OpMullHiLane;
240     OpMap["OP_MLA_LN"]= OpMlaLane;
241     OpMap["OP_MLS_LN"]= OpMlsLane;
242     OpMap["OP_MLAL_LN"] = OpMlalLane;
243     OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
244     OpMap["OP_MLSL_LN"] = OpMlslLane;
245     OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
246     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
247     OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
248     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
249     OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
250     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
251     OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
252     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
253     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
254     OpMap["OP_FMS_LN"] = OpFMSLane;
255     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
256     OpMap["OP_TRN1"]  = OpTrn1;
257     OpMap["OP_ZIP1"]  = OpZip1;
258     OpMap["OP_UZP1"]  = OpUzp1;
259     OpMap["OP_TRN2"]  = OpTrn2;
260     OpMap["OP_ZIP2"]  = OpZip2;
261     OpMap["OP_UZP2"]  = OpUzp2;
262     OpMap["OP_EQ"]    = OpEq;
263     OpMap["OP_GE"]    = OpGe;
264     OpMap["OP_LE"]    = OpLe;
265     OpMap["OP_GT"]    = OpGt;
266     OpMap["OP_LT"]    = OpLt;
267     OpMap["OP_NEG"]   = OpNeg;
268     OpMap["OP_NOT"]   = OpNot;
269     OpMap["OP_AND"]   = OpAnd;
270     OpMap["OP_OR"]    = OpOr;
271     OpMap["OP_XOR"]   = OpXor;
272     OpMap["OP_ANDN"]  = OpAndNot;
273     OpMap["OP_ORN"]   = OpOrNot;
274     OpMap["OP_CAST"]  = OpCast;
275     OpMap["OP_CONC"]  = OpConcat;
276     OpMap["OP_HI"]    = OpHi;
277     OpMap["OP_LO"]    = OpLo;
278     OpMap["OP_DUP"]   = OpDup;
279     OpMap["OP_DUP_LN"] = OpDupLane;
280     OpMap["OP_SEL"]   = OpSelect;
281     OpMap["OP_REV16"] = OpRev16;
282     OpMap["OP_REV32"] = OpRev32;
283     OpMap["OP_REV64"] = OpRev64;
284     OpMap["OP_XTN"] = OpXtnHi;
285     OpMap["OP_SQXTUN"] = OpSqxtunHi;
286     OpMap["OP_QXTN"] = OpQxtnHi;
287     OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
288     OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
289     OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
290     OpMap["OP_REINT"] = OpReinterpret;
291     OpMap["OP_ADDHNHi"] = OpAddhnHi;
292     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
293     OpMap["OP_SUBHNHi"] = OpSubhnHi;
294     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
295     OpMap["OP_ABDL"]  = OpAbdl;
296     OpMap["OP_ABDLHi"] = OpAbdlHi;
297     OpMap["OP_ABA"]   = OpAba;
298     OpMap["OP_ABAL"]  = OpAbal;
299     OpMap["OP_ABALHi"] = OpAbalHi;
300     OpMap["OP_QDMULLHi"] = OpQDMullHi;
301     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
302     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
303     OpMap["OP_DIV"] = OpDiv;
304     OpMap["OP_LONG_HI"] = OpLongHi;
305     OpMap["OP_NARROW_HI"] = OpNarrowHi;
306     OpMap["OP_MOVL_HI"] = OpMovlHi;
307     OpMap["OP_COPY_LN"] = OpCopyLane;
308     OpMap["OP_COPYQ_LN"] = OpCopyQLane;
309     OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
310     OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
311     OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
312     OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
313     OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
314     OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
315     OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
316     OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
317     OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
318     OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
319     OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
320     OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
321     OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
322 
323 
324     Record *SI = R.getClass("SInst");
325     Record *II = R.getClass("IInst");
326     Record *WI = R.getClass("WInst");
327     Record *SOpI = R.getClass("SOpInst");
328     Record *IOpI = R.getClass("IOpInst");
329     Record *WOpI = R.getClass("WOpInst");
330     Record *LOpI = R.getClass("LOpInst");
331     Record *NoTestOpI = R.getClass("NoTestOpInst");
332 
333     ClassMap[SI] = ClassS;
334     ClassMap[II] = ClassI;
335     ClassMap[WI] = ClassW;
336     ClassMap[SOpI] = ClassS;
337     ClassMap[IOpI] = ClassI;
338     ClassMap[WOpI] = ClassW;
339     ClassMap[LOpI] = ClassL;
340     ClassMap[NoTestOpI] = ClassNoTest;
341   }
342 
343   // run - Emit arm_neon.h.inc
344   void run(raw_ostream &o);
345 
346   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
347   void runHeader(raw_ostream &o);
348 
349   // runTests - Emit tests for all the Neon intrinsics.
350   void runTests(raw_ostream &o);
351 
352 private:
353   void emitIntrinsic(raw_ostream &OS, Record *R,
354                      StringMap<ClassKind> &EmittedMap);
355   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
356                       bool isA64GenBuiltinDef);
357   void genOverloadTypeCheckCode(raw_ostream &OS,
358                                 StringMap<ClassKind> &A64IntrinsicMap,
359                                 bool isA64TypeCheck);
360   void genIntrinsicRangeCheckCode(raw_ostream &OS,
361                                   StringMap<ClassKind> &A64IntrinsicMap,
362                                   bool isA64RangeCheck);
363   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
364                      bool isA64TestGen);
365 };
366 } // end anonymous namespace
367 
368 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
369 /// which each StringRef representing a single type declared in the string.
370 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
371 /// 2xfloat and 4xfloat respectively.
372 static void ParseTypes(Record *r, std::string &s,
373                        SmallVectorImpl<StringRef> &TV) {
374   const char *data = s.data();
375   int len = 0;
376 
377   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
378     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
379                          || data[len] == 'H' || data[len] == 'S')
380       continue;
381 
382     switch (data[len]) {
383       case 'c':
384       case 's':
385       case 'i':
386       case 'l':
387       case 'h':
388       case 'f':
389       case 'd':
390         break;
391       default:
392         PrintFatalError(r->getLoc(),
393                       "Unexpected letter: " + std::string(data + len, 1));
394     }
395     TV.push_back(StringRef(data, len + 1));
396     data += len + 1;
397     len = -1;
398   }
399 }
400 
401 /// Widen - Convert a type code into the next wider type.  char -> short,
402 /// short -> int, etc.
403 static char Widen(const char t) {
404   switch (t) {
405     case 'c':
406       return 's';
407     case 's':
408       return 'i';
409     case 'i':
410       return 'l';
411     case 'h':
412       return 'f';
413     case 'f':
414       return 'd';
415     default:
416       PrintFatalError("unhandled type in widen!");
417   }
418 }
419 
420 /// Narrow - Convert a type code into the next smaller type.  short -> char,
421 /// float -> half float, etc.
422 static char Narrow(const char t) {
423   switch (t) {
424     case 's':
425       return 'c';
426     case 'i':
427       return 's';
428     case 'l':
429       return 'i';
430     case 'f':
431       return 'h';
432     case 'd':
433       return 'f';
434     default:
435       PrintFatalError("unhandled type in narrow!");
436   }
437 }
438 
439 static std::string GetNarrowTypestr(StringRef ty)
440 {
441   std::string s;
442   for (size_t i = 0, end = ty.size(); i < end; i++) {
443     switch (ty[i]) {
444       case 's':
445         s += 'c';
446         break;
447       case 'i':
448         s += 's';
449         break;
450       case 'l':
451         s += 'i';
452         break;
453       default:
454         s += ty[i];
455         break;
456     }
457   }
458 
459   return s;
460 }
461 
462 /// For a particular StringRef, return the base type code, and whether it has
463 /// the quad-vector, polynomial, or unsigned modifiers set.
464 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
465   unsigned off = 0;
466   // ignore scalar.
467   if (ty[off] == 'S') {
468     ++off;
469   }
470   // remember quad.
471   if (ty[off] == 'Q' || ty[off] == 'H') {
472     quad = true;
473     ++off;
474   }
475 
476   // remember poly.
477   if (ty[off] == 'P') {
478     poly = true;
479     ++off;
480   }
481 
482   // remember unsigned.
483   if (ty[off] == 'U') {
484     usgn = true;
485     ++off;
486   }
487 
488   // base type to get the type string for.
489   return ty[off];
490 }
491 
492 /// ModType - Transform a type code and its modifiers based on a mod code. The
493 /// mod code definitions may be found at the top of arm_neon.td.
494 static char ModType(const char mod, char type, bool &quad, bool &poly,
495                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
496   switch (mod) {
497     case 't':
498       if (poly) {
499         poly = false;
500         usgn = true;
501       }
502       break;
503     case 'b':
504       scal = true;
505     case 'u':
506       usgn = true;
507       poly = false;
508       if (type == 'f')
509         type = 'i';
510       if (type == 'd')
511         type = 'l';
512       break;
513     case '$':
514       scal = true;
515     case 'x':
516       usgn = false;
517       poly = false;
518       if (type == 'f')
519         type = 'i';
520       if (type == 'd')
521         type = 'l';
522       break;
523     case 'o':
524       scal = true;
525       type = 'd';
526       usgn = false;
527       break;
528     case 'y':
529       scal = true;
530     case 'f':
531       if (type == 'h')
532         quad = true;
533       type = 'f';
534       usgn = false;
535       break;
536     case 'g':
537       quad = false;
538       break;
539     case 'B':
540     case 'C':
541     case 'D':
542     case 'j':
543       quad = true;
544       break;
545     case 'w':
546       type = Widen(type);
547       quad = true;
548       break;
549     case 'n':
550       type = Widen(type);
551       break;
552     case 'i':
553       type = 'i';
554       scal = true;
555       break;
556     case 'l':
557       type = 'l';
558       scal = true;
559       usgn = true;
560       break;
561     case 'z':
562       type = Narrow(type);
563       scal = true;
564       break;
565     case 'r':
566       type = Widen(type);
567       scal = true;
568       break;
569     case 's':
570     case 'a':
571       scal = true;
572       break;
573     case 'k':
574       quad = true;
575       break;
576     case 'c':
577       cnst = true;
578     case 'p':
579       pntr = true;
580       scal = true;
581       break;
582     case 'h':
583       type = Narrow(type);
584       if (type == 'h')
585         quad = false;
586       break;
587     case 'q':
588       type = Narrow(type);
589       quad = true;
590       break;
591     case 'e':
592       type = Narrow(type);
593       usgn = true;
594       break;
595     case 'm':
596       type = Narrow(type);
597       quad = false;
598       break;
599     default:
600       break;
601   }
602   return type;
603 }
604 
605 static bool IsMultiVecProto(const char p) {
606   return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
607 }
608 
609 /// TypeString - for a modifier and type, generate the name of the typedef for
610 /// that type.  QUc -> uint8x8_t.
611 static std::string TypeString(const char mod, StringRef typestr) {
612   bool quad = false;
613   bool poly = false;
614   bool usgn = false;
615   bool scal = false;
616   bool cnst = false;
617   bool pntr = false;
618 
619   if (mod == 'v')
620     return "void";
621   if (mod == 'i')
622     return "int";
623 
624   // base type to get the type string for.
625   char type = ClassifyType(typestr, quad, poly, usgn);
626 
627   // Based on the modifying character, change the type and width if necessary.
628   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
629 
630   SmallString<128> s;
631 
632   if (usgn)
633     s.push_back('u');
634 
635   switch (type) {
636     case 'c':
637       s += poly ? "poly8" : "int8";
638       if (scal)
639         break;
640       s += quad ? "x16" : "x8";
641       break;
642     case 's':
643       s += poly ? "poly16" : "int16";
644       if (scal)
645         break;
646       s += quad ? "x8" : "x4";
647       break;
648     case 'i':
649       s += "int32";
650       if (scal)
651         break;
652       s += quad ? "x4" : "x2";
653       break;
654     case 'l':
655       s += (poly && !usgn)? "poly64" : "int64";
656       if (scal)
657         break;
658       s += quad ? "x2" : "x1";
659       break;
660     case 'h':
661       s += "float16";
662       if (scal)
663         break;
664       s += quad ? "x8" : "x4";
665       break;
666     case 'f':
667       s += "float32";
668       if (scal)
669         break;
670       s += quad ? "x4" : "x2";
671       break;
672     case 'd':
673       s += "float64";
674       if (scal)
675         break;
676       s += quad ? "x2" : "x1";
677       break;
678 
679     default:
680       PrintFatalError("unhandled type!");
681   }
682 
683   if (mod == '2' || mod == 'B')
684     s += "x2";
685   if (mod == '3' || mod == 'C')
686     s += "x3";
687   if (mod == '4' || mod == 'D')
688     s += "x4";
689 
690   // Append _t, finishing the type string typedef type.
691   s += "_t";
692 
693   if (cnst)
694     s += " const";
695 
696   if (pntr)
697     s += " *";
698 
699   return s.str();
700 }
701 
702 /// BuiltinTypeString - for a modifier and type, generate the clang
703 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
704 /// Builtins.def for a description of the type strings.
705 static std::string BuiltinTypeString(const char mod, StringRef typestr,
706                                      ClassKind ck, bool ret) {
707   bool quad = false;
708   bool poly = false;
709   bool usgn = false;
710   bool scal = false;
711   bool cnst = false;
712   bool pntr = false;
713 
714   if (mod == 'v')
715     return "v"; // void
716   if (mod == 'i')
717     return "i"; // int
718 
719   // base type to get the type string for.
720   char type = ClassifyType(typestr, quad, poly, usgn);
721 
722   // Based on the modifying character, change the type and width if necessary.
723   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
724 
725   // All pointers are void* pointers.  Change type to 'v' now.
726   if (pntr) {
727     usgn = false;
728     poly = false;
729     type = 'v';
730   }
731   // Treat half-float ('h') types as unsigned short ('s') types.
732   if (type == 'h') {
733     type = 's';
734     usgn = true;
735   }
736   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
737                          scal && type != 'f' && type != 'd');
738 
739   if (scal) {
740     SmallString<128> s;
741 
742     if (usgn)
743       s.push_back('U');
744     else if (type == 'c')
745       s.push_back('S'); // make chars explicitly signed
746 
747     if (type == 'l') // 64-bit long
748       s += "LLi";
749     else
750       s.push_back(type);
751 
752     if (cnst)
753       s.push_back('C');
754     if (pntr)
755       s.push_back('*');
756     return s.str();
757   }
758 
759   // Since the return value must be one type, return a vector type of the
760   // appropriate width which we will bitcast.  An exception is made for
761   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
762   // fashion, storing them to a pointer arg.
763   if (ret) {
764     if (IsMultiVecProto(mod))
765       return "vv*"; // void result with void* first argument
766     if (mod == 'f' || (ck != ClassB && type == 'f'))
767       return quad ? "V4f" : "V2f";
768     if (ck != ClassB && type == 'd')
769       return quad ? "V2d" : "V1d";
770     if (ck != ClassB && type == 's')
771       return quad ? "V8s" : "V4s";
772     if (ck != ClassB && type == 'i')
773       return quad ? "V4i" : "V2i";
774     if (ck != ClassB && type == 'l')
775       return quad ? "V2LLi" : "V1LLi";
776 
777     return quad ? "V16Sc" : "V8Sc";
778   }
779 
780   // Non-return array types are passed as individual vectors.
781   if (mod == '2' || mod == 'B')
782     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
783   if (mod == '3' || mod == 'C')
784     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
785   if (mod == '4' || mod == 'D')
786     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
787 
788   if (mod == 'f' || (ck != ClassB && type == 'f'))
789     return quad ? "V4f" : "V2f";
790   if (ck != ClassB && type == 'd')
791     return quad ? "V2d" : "V1d";
792   if (ck != ClassB && type == 's')
793     return quad ? "V8s" : "V4s";
794   if (ck != ClassB && type == 'i')
795     return quad ? "V4i" : "V2i";
796   if (ck != ClassB && type == 'l')
797     return quad ? "V2LLi" : "V1LLi";
798 
799   return quad ? "V16Sc" : "V8Sc";
800 }
801 
802 /// InstructionTypeCode - Computes the ARM argument character code and
803 /// quad status for a specific type string and ClassKind.
804 static void InstructionTypeCode(const StringRef &typeStr,
805                                 const ClassKind ck,
806                                 bool &quad,
807                                 std::string &typeCode) {
808   bool poly = false;
809   bool usgn = false;
810   char type = ClassifyType(typeStr, quad, poly, usgn);
811 
812   switch (type) {
813   case 'c':
814     switch (ck) {
815     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
816     case ClassI: typeCode = "i8"; break;
817     case ClassW: typeCode = "8"; break;
818     default: break;
819     }
820     break;
821   case 's':
822     switch (ck) {
823     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
824     case ClassI: typeCode = "i16"; break;
825     case ClassW: typeCode = "16"; break;
826     default: break;
827     }
828     break;
829   case 'i':
830     switch (ck) {
831     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
832     case ClassI: typeCode = "i32"; break;
833     case ClassW: typeCode = "32"; break;
834     default: break;
835     }
836     break;
837   case 'l':
838     switch (ck) {
839     case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
840     case ClassI: typeCode = "i64"; break;
841     case ClassW: typeCode = "64"; break;
842     default: break;
843     }
844     break;
845   case 'h':
846     switch (ck) {
847     case ClassS:
848     case ClassI: typeCode = "f16"; break;
849     case ClassW: typeCode = "16"; break;
850     default: break;
851     }
852     break;
853   case 'f':
854     switch (ck) {
855     case ClassS:
856     case ClassI: typeCode = "f32"; break;
857     case ClassW: typeCode = "32"; break;
858     default: break;
859     }
860     break;
861   case 'd':
862     switch (ck) {
863     case ClassS:
864     case ClassI:
865       typeCode += "f64";
866       break;
867     case ClassW:
868       PrintFatalError("unhandled type!");
869     default:
870       break;
871     }
872     break;
873   default:
874     PrintFatalError("unhandled type!");
875   }
876 }
877 
878 static char Insert_BHSD_Suffix(StringRef typestr){
879   unsigned off = 0;
880   if(typestr[off++] == 'S'){
881     while(typestr[off] == 'Q' || typestr[off] == 'H'||
882           typestr[off] == 'P' || typestr[off] == 'U')
883       ++off;
884     switch (typestr[off]){
885     default  : break;
886     case 'c' : return 'b';
887     case 's' : return 'h';
888     case 'i' :
889     case 'f' : return 's';
890     case 'l' :
891     case 'd' : return 'd';
892     }
893   }
894   return 0;
895 }
896 
897 static bool endsWith_xN(std::string const &name) {
898   if (name.length() > 3) {
899     if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
900         name.compare(name.length() - 3, 3, "_x3") == 0 ||
901         name.compare(name.length() - 3, 3, "_x4") == 0)
902       return true;
903   }
904   return false;
905 }
906 
907 /// MangleName - Append a type or width suffix to a base neon function name,
908 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
909 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
910 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
911 static std::string MangleName(const std::string &name, StringRef typestr,
912                               ClassKind ck) {
913   if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
914     return name;
915 
916   bool quad = false;
917   std::string typeCode = "";
918 
919   InstructionTypeCode(typestr, ck, quad, typeCode);
920 
921   std::string s = name;
922 
923   if (typeCode.size() > 0) {
924     // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
925     if (endsWith_xN(s))
926       s.insert(s.length() - 3, "_" + typeCode);
927     else
928       s += "_" + typeCode;
929   }
930 
931   if (ck == ClassB)
932     s += "_v";
933 
934   // Insert a 'q' before the first '_' character so that it ends up before
935   // _lane or _n on vector-scalar operations.
936   if (typestr.find("Q") != StringRef::npos) {
937       size_t pos = s.find('_');
938       s = s.insert(pos, "q");
939   }
940   char ins = Insert_BHSD_Suffix(typestr);
941   if(ins){
942     size_t pos = s.find('_');
943     s = s.insert(pos, &ins, 1);
944   }
945 
946   return s;
947 }
948 
949 static void PreprocessInstruction(const StringRef &Name,
950                                   const std::string &InstName,
951                                   std::string &Prefix,
952                                   bool &HasNPostfix,
953                                   bool &HasLanePostfix,
954                                   bool &HasDupPostfix,
955                                   bool &IsSpecialVCvt,
956                                   size_t &TBNumber) {
957   // All of our instruction name fields from arm_neon.td are of the form
958   //   <instructionname>_...
959   // Thus we grab our instruction name via computation of said Prefix.
960   const size_t PrefixEnd = Name.find_first_of('_');
961   // If InstName is passed in, we use that instead of our name Prefix.
962   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
963 
964   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
965 
966   HasNPostfix = Postfix.count("_n");
967   HasLanePostfix = Postfix.count("_lane");
968   HasDupPostfix = Postfix.count("_dup");
969   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
970 
971   if (InstName.compare("vtbl") == 0 ||
972       InstName.compare("vtbx") == 0) {
973     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
974     // encoding to get its true value.
975     TBNumber = Name[Name.size()-1] - 48;
976   }
977 }
978 
979 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
980 /// extracted, generate a FileCheck pattern for a Load Or Store
981 static void
982 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
983                                           const std::string& OutTypeCode,
984                                           const bool &IsQuad,
985                                           const bool &HasDupPostfix,
986                                           const bool &HasLanePostfix,
987                                           const size_t Count,
988                                           std::string &RegisterSuffix) {
989   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
990   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
991   // will output a series of v{ld,st}1s, so we have to handle it specially.
992   if ((Count == 3 || Count == 4) && IsQuad) {
993     RegisterSuffix += "{";
994     for (size_t i = 0; i < Count; i++) {
995       RegisterSuffix += "d{{[0-9]+}}";
996       if (HasDupPostfix) {
997         RegisterSuffix += "[]";
998       }
999       if (HasLanePostfix) {
1000         RegisterSuffix += "[{{[0-9]+}}]";
1001       }
1002       if (i < Count-1) {
1003         RegisterSuffix += ", ";
1004       }
1005     }
1006     RegisterSuffix += "}";
1007   } else {
1008 
1009     // Handle normal loads and stores.
1010     RegisterSuffix += "{";
1011     for (size_t i = 0; i < Count; i++) {
1012       RegisterSuffix += "d{{[0-9]+}}";
1013       if (HasDupPostfix) {
1014         RegisterSuffix += "[]";
1015       }
1016       if (HasLanePostfix) {
1017         RegisterSuffix += "[{{[0-9]+}}]";
1018       }
1019       if (IsQuad && !HasLanePostfix) {
1020         RegisterSuffix += ", d{{[0-9]+}}";
1021         if (HasDupPostfix) {
1022           RegisterSuffix += "[]";
1023         }
1024       }
1025       if (i < Count-1) {
1026         RegisterSuffix += ", ";
1027       }
1028     }
1029     RegisterSuffix += "}, [r{{[0-9]+}}";
1030 
1031     // We only include the alignment hint if we have a vld1.*64 or
1032     // a dup/lane instruction.
1033     if (IsLDSTOne) {
1034       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1035         RegisterSuffix += ":" + OutTypeCode;
1036       }
1037     }
1038 
1039     RegisterSuffix += "]";
1040   }
1041 }
1042 
1043 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1044                                      const bool &HasNPostfix) {
1045   return (NameRef.count("vmla") ||
1046           NameRef.count("vmlal") ||
1047           NameRef.count("vmlsl") ||
1048           NameRef.count("vmull") ||
1049           NameRef.count("vqdmlal") ||
1050           NameRef.count("vqdmlsl") ||
1051           NameRef.count("vqdmulh") ||
1052           NameRef.count("vqdmull") ||
1053           NameRef.count("vqrdmulh")) && HasNPostfix;
1054 }
1055 
1056 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1057                                          const bool &HasLanePostfix) {
1058   return (NameRef.count("vmla") ||
1059           NameRef.count("vmls") ||
1060           NameRef.count("vmlal") ||
1061           NameRef.count("vmlsl") ||
1062           (NameRef.count("vmul") && NameRef.size() == 3)||
1063           NameRef.count("vqdmlal") ||
1064           NameRef.count("vqdmlsl") ||
1065           NameRef.count("vqdmulh") ||
1066           NameRef.count("vqrdmulh")) && HasLanePostfix;
1067 }
1068 
1069 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1070                                   const bool &HasLanePostfix,
1071                                   const bool &IsQuad) {
1072   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1073                                && IsQuad;
1074   const bool IsVMull = NameRef.count("mull") && !IsQuad;
1075   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1076 }
1077 
1078 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1079                                                      const std::string &Proto,
1080                                                      const bool &HasNPostfix,
1081                                                      const bool &IsQuad,
1082                                                      const bool &HasLanePostfix,
1083                                                      const bool &HasDupPostfix,
1084                                                      std::string &NormedProto) {
1085   // Handle generic case.
1086   const StringRef NameRef(Name);
1087   for (size_t i = 0, end = Proto.size(); i < end; i++) {
1088     switch (Proto[i]) {
1089     case 'u':
1090     case 'f':
1091     case 'd':
1092     case 's':
1093     case 'x':
1094     case 't':
1095     case 'n':
1096       NormedProto += IsQuad? 'q' : 'd';
1097       break;
1098     case 'w':
1099     case 'k':
1100       NormedProto += 'q';
1101       break;
1102     case 'g':
1103     case 'j':
1104     case 'h':
1105     case 'e':
1106       NormedProto += 'd';
1107       break;
1108     case 'i':
1109       NormedProto += HasLanePostfix? 'a' : 'i';
1110       break;
1111     case 'a':
1112       if (HasLanePostfix) {
1113         NormedProto += 'a';
1114       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1115         NormedProto += IsQuad? 'q' : 'd';
1116       } else {
1117         NormedProto += 'i';
1118       }
1119       break;
1120     }
1121   }
1122 
1123   // Handle Special Cases.
1124   const bool IsNotVExt = !NameRef.count("vext");
1125   const bool IsVPADAL = NameRef.count("vpadal");
1126   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1127                                                            HasLanePostfix);
1128   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1129                                                       IsQuad);
1130 
1131   if (IsSpecialLaneMul) {
1132     // If
1133     NormedProto[2] = NormedProto[3];
1134     NormedProto.erase(3);
1135   } else if (NormedProto.size() == 4 &&
1136              NormedProto[0] == NormedProto[1] &&
1137              IsNotVExt) {
1138     // If NormedProto.size() == 4 and the first two proto characters are the
1139     // same, ignore the first.
1140     NormedProto = NormedProto.substr(1, 3);
1141   } else if (Is5OpLaneAccum) {
1142     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1143     std::string tmp = NormedProto.substr(1,2);
1144     tmp += NormedProto[4];
1145     NormedProto = tmp;
1146   } else if (IsVPADAL) {
1147     // If we have VPADAL, ignore the first character.
1148     NormedProto = NormedProto.substr(0, 2);
1149   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1150     // If our instruction is a dup instruction, keep only the first and
1151     // last characters.
1152     std::string tmp = "";
1153     tmp += NormedProto[0];
1154     tmp += NormedProto[NormedProto.size()-1];
1155     NormedProto = tmp;
1156   }
1157 }
1158 
1159 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1160 /// extracted, generate a FileCheck pattern to check that an
1161 /// instruction's arguments are correct.
1162 static void GenerateRegisterCheckPattern(const std::string &Name,
1163                                          const std::string &Proto,
1164                                          const std::string &OutTypeCode,
1165                                          const bool &HasNPostfix,
1166                                          const bool &IsQuad,
1167                                          const bool &HasLanePostfix,
1168                                          const bool &HasDupPostfix,
1169                                          const size_t &TBNumber,
1170                                          std::string &RegisterSuffix) {
1171 
1172   RegisterSuffix = "";
1173 
1174   const StringRef NameRef(Name);
1175   const StringRef ProtoRef(Proto);
1176 
1177   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1178     return;
1179   }
1180 
1181   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1182   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1183 
1184   if (IsLoadStore) {
1185     // Grab N value from  v{ld,st}N using its ascii representation.
1186     const size_t Count = NameRef[3] - 48;
1187 
1188     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1189                                               HasDupPostfix, HasLanePostfix,
1190                                               Count, RegisterSuffix);
1191   } else if (IsTBXOrTBL) {
1192     RegisterSuffix += "d{{[0-9]+}}, {";
1193     for (size_t i = 0; i < TBNumber-1; i++) {
1194       RegisterSuffix += "d{{[0-9]+}}, ";
1195     }
1196     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1197   } else {
1198     // Handle a normal instruction.
1199     if (NameRef.count("vget") || NameRef.count("vset"))
1200       return;
1201 
1202     // We first normalize our proto, since we only need to emit 4
1203     // different types of checks, yet have more than 4 proto types
1204     // that map onto those 4 patterns.
1205     std::string NormalizedProto("");
1206     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1207                                              HasLanePostfix, HasDupPostfix,
1208                                              NormalizedProto);
1209 
1210     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1211       const char &c = NormalizedProto[i];
1212       switch (c) {
1213       case 'q':
1214         RegisterSuffix += "q{{[0-9]+}}, ";
1215         break;
1216 
1217       case 'd':
1218         RegisterSuffix += "d{{[0-9]+}}, ";
1219         break;
1220 
1221       case 'i':
1222         RegisterSuffix += "#{{[0-9]+}}, ";
1223         break;
1224 
1225       case 'a':
1226         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1227         break;
1228       }
1229     }
1230 
1231     // Remove extra ", ".
1232     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1233   }
1234 }
1235 
1236 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1237 /// typestr + class kind, generate the proper set of FileCheck
1238 /// Patterns to check for. We could just return a string, but instead
1239 /// use a vector since it provides us with the extra flexibility of
1240 /// emitting multiple checks, which comes in handy for certain cases
1241 /// like mla where we want to check for 2 different instructions.
1242 static void GenerateChecksForIntrinsic(const std::string &Name,
1243                                        const std::string &Proto,
1244                                        StringRef &OutTypeStr,
1245                                        StringRef &InTypeStr,
1246                                        ClassKind Ck,
1247                                        const std::string &InstName,
1248                                        bool IsHiddenLOp,
1249                                        std::vector<std::string>& Result) {
1250 
1251   // If Ck is a ClassNoTest instruction, just return so no test is
1252   // emitted.
1253   if(Ck == ClassNoTest)
1254     return;
1255 
1256   if (Name == "vcvt_f32_f16") {
1257     Result.push_back("vcvt.f32.f16");
1258     return;
1259   }
1260 
1261 
1262   // Now we preprocess our instruction given the data we have to get the
1263   // data that we need.
1264   // Create a StringRef for String Manipulation of our Name.
1265   const StringRef NameRef(Name);
1266   // Instruction Prefix.
1267   std::string Prefix;
1268   // The type code for our out type string.
1269   std::string OutTypeCode;
1270   // To handle our different cases, we need to check for different postfixes.
1271   // Is our instruction a quad instruction.
1272   bool IsQuad = false;
1273   // Our instruction is of the form <instructionname>_n.
1274   bool HasNPostfix = false;
1275   // Our instruction is of the form <instructionname>_lane.
1276   bool HasLanePostfix = false;
1277   // Our instruction is of the form <instructionname>_dup.
1278   bool HasDupPostfix  = false;
1279   // Our instruction is a vcvt instruction which requires special handling.
1280   bool IsSpecialVCvt = false;
1281   // If we have a vtbxN or vtblN instruction, this is set to N.
1282   size_t TBNumber = -1;
1283   // Register Suffix
1284   std::string RegisterSuffix;
1285 
1286   PreprocessInstruction(NameRef, InstName, Prefix,
1287                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1288                         IsSpecialVCvt, TBNumber);
1289 
1290   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1291   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1292                                HasLanePostfix, HasDupPostfix, TBNumber,
1293                                RegisterSuffix);
1294 
1295   // In the following section, we handle a bunch of special cases. You can tell
1296   // a special case by the fact we are returning early.
1297 
1298   // If our instruction is a logical instruction without postfix or a
1299   // hidden LOp just return the current Prefix.
1300   if (Ck == ClassL || IsHiddenLOp) {
1301     Result.push_back(Prefix + " " + RegisterSuffix);
1302     return;
1303   }
1304 
1305   // If we have a vmov, due to the many different cases, some of which
1306   // vary within the different intrinsics generated for a single
1307   // instruction type, just output a vmov. (e.g. given an instruction
1308   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1309   //
1310   // FIXME: Maybe something can be done about this. The two cases that we care
1311   // about are vmov as an LType and vmov as a WType.
1312   if (Prefix == "vmov") {
1313     Result.push_back(Prefix + " " + RegisterSuffix);
1314     return;
1315   }
1316 
1317   // In the following section, we handle special cases.
1318 
1319   if (OutTypeCode == "64") {
1320     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1321     // type, the intrinsic will be optimized away, so just return
1322     // nothing.  On the other hand if we are handling an uint64x2_t
1323     // (i.e. quad instruction), vdup/vmov instructions should be
1324     // emitted.
1325     if (Prefix == "vdup" || Prefix == "vext") {
1326       if (IsQuad) {
1327         Result.push_back("{{vmov|vdup}}");
1328       }
1329       return;
1330     }
1331 
1332     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1333     // multiple register operands.
1334     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1335                             || Prefix == "vld4";
1336     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1337                             || Prefix == "vst4";
1338     if (MultiLoadPrefix || MultiStorePrefix) {
1339       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1340       return;
1341     }
1342 
1343     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1344     // emitting said instructions. So return a check for
1345     // vldr/vstr/vmov/str instead.
1346     if (HasLanePostfix || HasDupPostfix) {
1347       if (Prefix == "vst1") {
1348         Result.push_back("{{str|vstr|vmov}}");
1349         return;
1350       } else if (Prefix == "vld1") {
1351         Result.push_back("{{ldr|vldr|vmov}}");
1352         return;
1353       }
1354     }
1355   }
1356 
1357   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1358   // sometimes disassembled as vtrn.32. We use a regex to handle both
1359   // cases.
1360   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1361     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1362     return;
1363   }
1364 
1365   // Currently on most ARM processors, we do not use vmla/vmls for
1366   // quad floating point operations. Instead we output vmul + vadd. So
1367   // check if we have one of those instructions and just output a
1368   // check for vmul.
1369   if (OutTypeCode == "f32") {
1370     if (Prefix == "vmls") {
1371       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1372       Result.push_back("vsub." + OutTypeCode);
1373       return;
1374     } else if (Prefix == "vmla") {
1375       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1376       Result.push_back("vadd." + OutTypeCode);
1377       return;
1378     }
1379   }
1380 
1381   // If we have vcvt, get the input type from the instruction name
1382   // (which should be of the form instname_inputtype) and append it
1383   // before the output type.
1384   if (Prefix == "vcvt") {
1385     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1386     Prefix += "." + inTypeCode;
1387   }
1388 
1389   // Append output type code to get our final mangled instruction.
1390   Prefix += "." + OutTypeCode;
1391 
1392   Result.push_back(Prefix + " " + RegisterSuffix);
1393 }
1394 
1395 /// UseMacro - Examine the prototype string to determine if the intrinsic
1396 /// should be defined as a preprocessor macro instead of an inline function.
1397 static bool UseMacro(const std::string &proto) {
1398   // If this builtin takes an immediate argument, we need to #define it rather
1399   // than use a standard declaration, so that SemaChecking can range check
1400   // the immediate passed by the user.
1401   if (proto.find('i') != std::string::npos)
1402     return true;
1403 
1404   // Pointer arguments need to use macros to avoid hiding aligned attributes
1405   // from the pointer type.
1406   if (proto.find('p') != std::string::npos ||
1407       proto.find('c') != std::string::npos)
1408     return true;
1409 
1410   return false;
1411 }
1412 
1413 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1414 /// defined as a macro should be accessed directly instead of being first
1415 /// assigned to a local temporary.
1416 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1417   // True for constant ints (i), pointers (p) and const pointers (c).
1418   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1419 }
1420 
1421 // Generate the string "(argtype a, argtype b, ...)"
1422 static std::string GenArgs(const std::string &proto, StringRef typestr,
1423                            const std::string &name) {
1424   bool define = UseMacro(proto);
1425   char arg = 'a';
1426 
1427   std::string s;
1428   s += "(";
1429 
1430   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1431     if (define) {
1432       // Some macro arguments are used directly instead of being assigned
1433       // to local temporaries; prepend an underscore prefix to make their
1434       // names consistent with the local temporaries.
1435       if (MacroArgUsedDirectly(proto, i))
1436         s += "__";
1437     } else {
1438       s += TypeString(proto[i], typestr) + " __";
1439     }
1440     s.push_back(arg);
1441     //To avoid argument being multiple defined, add extra number for renaming.
1442     if (name == "vcopy_lane" || name == "vcopy_laneq")
1443       s.push_back('1');
1444     if ((i + 1) < e)
1445       s += ", ";
1446   }
1447 
1448   s += ")";
1449   return s;
1450 }
1451 
1452 // Macro arguments are not type-checked like inline function arguments, so
1453 // assign them to local temporaries to get the right type checking.
1454 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1455                                   const std::string &name ) {
1456   char arg = 'a';
1457   std::string s;
1458   bool generatedLocal = false;
1459 
1460   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1461     // Do not create a temporary for an immediate argument.
1462     // That would defeat the whole point of using a macro!
1463     if (MacroArgUsedDirectly(proto, i))
1464       continue;
1465     generatedLocal = true;
1466     bool extranumber = false;
1467     if (name == "vcopy_lane" || name == "vcopy_laneq")
1468       extranumber = true;
1469 
1470     s += TypeString(proto[i], typestr) + " __";
1471     s.push_back(arg);
1472     if(extranumber)
1473       s.push_back('1');
1474     s += " = (";
1475     s.push_back(arg);
1476     if(extranumber)
1477       s.push_back('1');
1478     s += "); ";
1479   }
1480 
1481   if (generatedLocal)
1482     s += "\\\n  ";
1483   return s;
1484 }
1485 
1486 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1487 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1488   std::string s, high;
1489   high = h ? "_high" : "";
1490   s = MangleName("vmovl" + high, typestr, ClassS);
1491   s += "(" + a + ")";
1492   return s;
1493 }
1494 
1495 // Get the high 64-bit part of a vector
1496 static std::string GetHigh(const std::string &a, StringRef typestr) {
1497   std::string s;
1498   s = MangleName("vget_high", typestr, ClassS);
1499   s += "(" + a + ")";
1500   return s;
1501 }
1502 
1503 // Gen operation with two operands and get high 64-bit for both of two operands.
1504 static std::string Gen2OpWith2High(StringRef typestr,
1505                                    const std::string &op,
1506                                    const std::string &a,
1507                                    const std::string &b) {
1508   std::string s;
1509   std::string Op1 = GetHigh(a, typestr);
1510   std::string Op2 = GetHigh(b, typestr);
1511   s = MangleName(op, typestr, ClassS);
1512   s += "(" + Op1 + ", " + Op2 + ");";
1513   return s;
1514 }
1515 
1516 // Gen operation with three operands and get high 64-bit of the latter
1517 // two operands.
1518 static std::string Gen3OpWith2High(StringRef typestr,
1519                                    const std::string &op,
1520                                    const std::string &a,
1521                                    const std::string &b,
1522                                    const std::string &c) {
1523   std::string s;
1524   std::string Op1 = GetHigh(b, typestr);
1525   std::string Op2 = GetHigh(c, typestr);
1526   s = MangleName(op, typestr, ClassS);
1527   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1528   return s;
1529 }
1530 
1531 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1532 static std::string GenCombine(std::string typestr,
1533                               const std::string &a,
1534                               const std::string &b) {
1535   std::string s;
1536   s = MangleName("vcombine", typestr, ClassS);
1537   s += "(" + a + ", " + b + ")";
1538   return s;
1539 }
1540 
1541 static std::string Duplicate(unsigned nElts, StringRef typestr,
1542                              const std::string &a) {
1543   std::string s;
1544 
1545   s = "(" + TypeString('d', typestr) + "){ ";
1546   for (unsigned i = 0; i != nElts; ++i) {
1547     s += a;
1548     if ((i + 1) < nElts)
1549       s += ", ";
1550   }
1551   s += " }";
1552 
1553   return s;
1554 }
1555 
1556 static std::string SplatLane(unsigned nElts, const std::string &vec,
1557                              const std::string &lane) {
1558   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1559   for (unsigned i = 0; i < nElts; ++i)
1560     s += ", " + lane;
1561   s += ")";
1562   return s;
1563 }
1564 
1565 static std::string RemoveHigh(const std::string &name) {
1566   std::string s = name;
1567   std::size_t found = s.find("_high_");
1568   if (found == std::string::npos)
1569     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1570   s.replace(found, 5, "");
1571   return s;
1572 }
1573 
1574 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1575   quad = false;
1576   bool dummy = false;
1577   char type = ClassifyType(typestr, quad, dummy, dummy);
1578   unsigned nElts = 0;
1579   switch (type) {
1580   case 'c': nElts = 8; break;
1581   case 's': nElts = 4; break;
1582   case 'i': nElts = 2; break;
1583   case 'l': nElts = 1; break;
1584   case 'h': nElts = 4; break;
1585   case 'f': nElts = 2; break;
1586   case 'd':
1587     nElts = 1;
1588     break;
1589   default:
1590     PrintFatalError("unhandled type!");
1591   }
1592   if (quad) nElts <<= 1;
1593   return nElts;
1594 }
1595 
1596 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1597 static std::string GenOpString(const std::string &name, OpKind op,
1598                                const std::string &proto, StringRef typestr) {
1599   bool quad;
1600   unsigned nElts = GetNumElements(typestr, quad);
1601   bool define = UseMacro(proto);
1602 
1603   std::string ts = TypeString(proto[0], typestr);
1604   std::string s;
1605   if (!define) {
1606     s = "return ";
1607   }
1608 
1609   switch(op) {
1610   case OpAdd:
1611     s += "__a + __b;";
1612     break;
1613   case OpAddl:
1614     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1615     break;
1616   case OpAddlHi:
1617     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1618     break;
1619   case OpAddw:
1620     s += "__a + " + Extend(typestr, "__b") + ";";
1621     break;
1622   case OpAddwHi:
1623     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1624     break;
1625   case OpSub:
1626     s += "__a - __b;";
1627     break;
1628   case OpSubl:
1629     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1630     break;
1631   case OpSublHi:
1632     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1633     break;
1634   case OpSubw:
1635     s += "__a - " + Extend(typestr, "__b") + ";";
1636     break;
1637   case OpSubwHi:
1638     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1639     break;
1640   case OpMulN:
1641     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1642     break;
1643   case OpMulLane:
1644     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1645     break;
1646   case OpMulXLane:
1647     s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1648       SplatLane(nElts, "__b", "__c") + ");";
1649     break;
1650   case OpMul:
1651     s += "__a * __b;";
1652     break;
1653   case OpMullLane:
1654     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1655       SplatLane(nElts, "__b", "__c") + ");";
1656     break;
1657   case OpMullHiLane:
1658     s += MangleName("vmull", typestr, ClassS) + "(" +
1659       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1660     break;
1661   case OpMlaN:
1662     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1663     break;
1664   case OpMlaLane:
1665     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1666     break;
1667   case OpMla:
1668     s += "__a + (__b * __c);";
1669     break;
1670   case OpMlalN:
1671     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1672       Duplicate(nElts, typestr, "__c") + ");";
1673     break;
1674   case OpMlalLane:
1675     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1676       SplatLane(nElts, "__c", "__d") + ");";
1677     break;
1678   case OpMlalHiLane:
1679     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1680       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1681     break;
1682   case OpMlal:
1683     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1684     break;
1685   case OpMullHi:
1686     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1687     break;
1688   case OpMlalHi:
1689     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1690     break;
1691   case OpMlsN:
1692     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1693     break;
1694   case OpMlsLane:
1695     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1696     break;
1697   case OpFMSLane:
1698     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1699     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1700     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1701     s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1702     break;
1703   case OpFMSLaneQ:
1704     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1705     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1706     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1707     s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1708     break;
1709   case OpMls:
1710     s += "__a - (__b * __c);";
1711     break;
1712   case OpMlslN:
1713     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1714       Duplicate(nElts, typestr, "__c") + ");";
1715     break;
1716   case OpMlslLane:
1717     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1718       SplatLane(nElts, "__c", "__d") + ");";
1719     break;
1720   case OpMlslHiLane:
1721     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1722       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1723     break;
1724   case OpMlsl:
1725     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1726     break;
1727   case OpMlslHi:
1728     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1729     break;
1730   case OpQDMullLane:
1731     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1732       SplatLane(nElts, "__b", "__c") + ");";
1733     break;
1734   case OpQDMullHiLane:
1735     s += MangleName("vqdmull", typestr, ClassS) + "(" +
1736       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1737     break;
1738   case OpQDMlalLane:
1739     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1740       SplatLane(nElts, "__c", "__d") + ");";
1741     break;
1742   case OpQDMlalHiLane:
1743     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1744       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1745     break;
1746   case OpQDMlslLane:
1747     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1748       SplatLane(nElts, "__c", "__d") + ");";
1749     break;
1750   case OpQDMlslHiLane:
1751     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1752       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1753     break;
1754   case OpQDMulhLane:
1755     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1756       SplatLane(nElts, "__b", "__c") + ");";
1757     break;
1758   case OpQRDMulhLane:
1759     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1760       SplatLane(nElts, "__b", "__c") + ");";
1761     break;
1762   case OpEq:
1763     s += "(" + ts + ")(__a == __b);";
1764     break;
1765   case OpGe:
1766     s += "(" + ts + ")(__a >= __b);";
1767     break;
1768   case OpLe:
1769     s += "(" + ts + ")(__a <= __b);";
1770     break;
1771   case OpGt:
1772     s += "(" + ts + ")(__a > __b);";
1773     break;
1774   case OpLt:
1775     s += "(" + ts + ")(__a < __b);";
1776     break;
1777   case OpNeg:
1778     s += " -__a;";
1779     break;
1780   case OpNot:
1781     s += " ~__a;";
1782     break;
1783   case OpAnd:
1784     s += "__a & __b;";
1785     break;
1786   case OpOr:
1787     s += "__a | __b;";
1788     break;
1789   case OpXor:
1790     s += "__a ^ __b;";
1791     break;
1792   case OpAndNot:
1793     s += "__a & ~__b;";
1794     break;
1795   case OpOrNot:
1796     s += "__a | ~__b;";
1797     break;
1798   case OpCast:
1799     s += "(" + ts + ")__a;";
1800     break;
1801   case OpConcat:
1802     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1803     s += ", (int64x1_t)__b, 0, 1);";
1804     break;
1805   case OpHi:
1806     // nElts is for the result vector, so the source is twice that number.
1807     s += "__builtin_shufflevector(__a, __a";
1808     for (unsigned i = nElts; i < nElts * 2; ++i)
1809       s += ", " + utostr(i);
1810     s+= ");";
1811     break;
1812   case OpLo:
1813     s += "__builtin_shufflevector(__a, __a";
1814     for (unsigned i = 0; i < nElts; ++i)
1815       s += ", " + utostr(i);
1816     s+= ");";
1817     break;
1818   case OpDup:
1819     s += Duplicate(nElts, typestr, "__a") + ";";
1820     break;
1821   case OpDupLane:
1822     s += SplatLane(nElts, "__a", "__b") + ";";
1823     break;
1824   case OpSelect:
1825     // ((0 & 1) | (~0 & 2))
1826     s += "(" + ts + ")";
1827     ts = TypeString(proto[1], typestr);
1828     s += "((__a & (" + ts + ")__b) | ";
1829     s += "(~__a & (" + ts + ")__c));";
1830     break;
1831   case OpRev16:
1832     s += "__builtin_shufflevector(__a, __a";
1833     for (unsigned i = 2; i <= nElts; i += 2)
1834       for (unsigned j = 0; j != 2; ++j)
1835         s += ", " + utostr(i - j - 1);
1836     s += ");";
1837     break;
1838   case OpRev32: {
1839     unsigned WordElts = nElts >> (1 + (int)quad);
1840     s += "__builtin_shufflevector(__a, __a";
1841     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1842       for (unsigned j = 0; j != WordElts; ++j)
1843         s += ", " + utostr(i - j - 1);
1844     s += ");";
1845     break;
1846   }
1847   case OpRev64: {
1848     unsigned DblWordElts = nElts >> (int)quad;
1849     s += "__builtin_shufflevector(__a, __a";
1850     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1851       for (unsigned j = 0; j != DblWordElts; ++j)
1852         s += ", " + utostr(i - j - 1);
1853     s += ");";
1854     break;
1855   }
1856   case OpXtnHi: {
1857     s = TypeString(proto[1], typestr) + " __a1 = " +
1858         MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1859         "return __builtin_shufflevector(__a, __a1";
1860     for (unsigned i = 0; i < nElts * 4; ++i)
1861       s += ", " + utostr(i);
1862     s += ");";
1863     break;
1864   }
1865   case OpSqxtunHi: {
1866     s = TypeString(proto[1], typestr) + " __a1 = " +
1867         MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1868         "return __builtin_shufflevector(__a, __a1";
1869     for (unsigned i = 0; i < nElts * 4; ++i)
1870       s += ", " + utostr(i);
1871     s += ");";
1872     break;
1873   }
1874   case OpQxtnHi: {
1875     s = TypeString(proto[1], typestr) + " __a1 = " +
1876         MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1877         "return __builtin_shufflevector(__a, __a1";
1878     for (unsigned i = 0; i < nElts * 4; ++i)
1879       s += ", " + utostr(i);
1880     s += ");";
1881     break;
1882   }
1883   case OpFcvtnHi: {
1884     std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1885     s = TypeString(proto[1], typestr) + " __a1 = " +
1886         MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1887         "return __builtin_shufflevector(__a, __a1";
1888     for (unsigned i = 0; i < nElts * 4; ++i)
1889       s += ", " + utostr(i);
1890     s += ");";
1891     break;
1892   }
1893   case OpFcvtlHi: {
1894     std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1895     s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1896         ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1897     break;
1898   }
1899   case OpFcvtxnHi: {
1900     s = TypeString(proto[1], typestr) + " __a1 = " +
1901         MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1902         "return __builtin_shufflevector(__a, __a1";
1903     for (unsigned i = 0; i < nElts * 4; ++i)
1904       s += ", " + utostr(i);
1905     s += ");";
1906     break;
1907   }
1908   case OpUzp1:
1909     s += "__builtin_shufflevector(__a, __b";
1910     for (unsigned i = 0; i < nElts; i++)
1911       s += ", " + utostr(2*i);
1912     s += ");";
1913     break;
1914   case OpUzp2:
1915     s += "__builtin_shufflevector(__a, __b";
1916     for (unsigned i = 0; i < nElts; i++)
1917       s += ", " + utostr(2*i+1);
1918     s += ");";
1919     break;
1920   case OpZip1:
1921     s += "__builtin_shufflevector(__a, __b";
1922     for (unsigned i = 0; i < (nElts/2); i++)
1923        s += ", " + utostr(i) + ", " + utostr(i+nElts);
1924     s += ");";
1925     break;
1926   case OpZip2:
1927     s += "__builtin_shufflevector(__a, __b";
1928     for (unsigned i = nElts/2; i < nElts; i++)
1929        s += ", " + utostr(i) + ", " + utostr(i+nElts);
1930     s += ");";
1931     break;
1932   case OpTrn1:
1933     s += "__builtin_shufflevector(__a, __b";
1934     for (unsigned i = 0; i < (nElts/2); i++)
1935        s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1936     s += ");";
1937     break;
1938   case OpTrn2:
1939     s += "__builtin_shufflevector(__a, __b";
1940     for (unsigned i = 0; i < (nElts/2); i++)
1941        s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1942     s += ");";
1943     break;
1944   case OpAbdl: {
1945     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1946     if (typestr[0] != 'U') {
1947       // vabd results are always unsigned and must be zero-extended.
1948       std::string utype = "U" + typestr.str();
1949       s += "(" + TypeString(proto[0], typestr) + ")";
1950       abd = "(" + TypeString('d', utype) + ")" + abd;
1951       s += Extend(utype, abd) + ";";
1952     } else {
1953       s += Extend(typestr, abd) + ";";
1954     }
1955     break;
1956   }
1957   case OpAbdlHi:
1958     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1959     break;
1960   case OpAddhnHi: {
1961     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1962     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1963     s += ";";
1964     break;
1965   }
1966   case OpRAddhnHi: {
1967     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1968     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1969     s += ";";
1970     break;
1971   }
1972   case OpSubhnHi: {
1973     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1974     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1975     s += ";";
1976     break;
1977   }
1978   case OpRSubhnHi: {
1979     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1980     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1981     s += ";";
1982     break;
1983   }
1984   case OpAba:
1985     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1986     break;
1987   case OpAbal:
1988     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1989     break;
1990   case OpAbalHi:
1991     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1992     break;
1993   case OpQDMullHi:
1994     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1995     break;
1996   case OpQDMlalHi:
1997     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1998     break;
1999   case OpQDMlslHi:
2000     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2001     break;
2002   case OpDiv:
2003     s += "__a / __b;";
2004     break;
2005   case OpMovlHi: {
2006     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2007         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
2008     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2009     s += "(__a1, 0);";
2010     break;
2011   }
2012   case OpLongHi: {
2013     // Another local variable __a1 is needed for calling a Macro,
2014     // or using __a will have naming conflict when Macro expanding.
2015     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2016          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2017     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2018          "(__a1, __b);";
2019     break;
2020   }
2021   case OpNarrowHi: {
2022     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2023          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2024     break;
2025   }
2026   case OpCopyLane: {
2027     s += TypeString('s', typestr) + " __c2 = " +
2028          MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
2029          MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2030     break;
2031   }
2032   case OpCopyQLane: {
2033     std::string typeCode = "";
2034     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2035     s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2036          "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2037     break;
2038   }
2039   case OpCopyLaneQ: {
2040     std::string typeCode = "";
2041     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2042     s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2043          "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2044     break;
2045   }
2046   case OpScalarMulLane: {
2047     std::string typeCode = "";
2048     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2049     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2050       "(__b, __c);\\\n  __a * __d1;";
2051     break;
2052   }
2053   case OpScalarMulLaneQ: {
2054     std::string typeCode = "";
2055     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2056         s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2057           "(__b, __c);\\\n  __a * __d1;";
2058     break;
2059   }
2060   case OpScalarMulXLane: {
2061     bool dummy = false;
2062     char type = ClassifyType(typestr, dummy, dummy, dummy);
2063     if (type == 'f') type = 's';
2064     std::string typeCode = "";
2065     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2066     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2067       "(__b, __c);\\\n  vmulx" + type + "_" +
2068       typeCode +  "(__a, __d1);";
2069     break;
2070   }
2071   case OpScalarMulXLaneQ: {
2072     bool dummy = false;
2073     char type = ClassifyType(typestr, dummy, dummy, dummy);
2074     if (type == 'f') type = 's';
2075     std::string typeCode = "";
2076     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2077     s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2078       typeCode + "(__b, __c);\\\n  vmulx" + type +
2079       "_" + typeCode +  "(__a, __d1);";
2080     break;
2081   }
2082 
2083   case OpScalarVMulXLane: {
2084     bool dummy = false;
2085     char type = ClassifyType(typestr, dummy, dummy, dummy);
2086     if (type == 'f') type = 's';
2087     std::string typeCode = "";
2088     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2089     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2090       typeCode + "(__a, 0);\\\n" +
2091       "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2092       typeCode + "(__b, __c);\\\n" +
2093       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2094       typeCode + "(__d1, __e1);\\\n" +
2095       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2096       "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2097     break;
2098   }
2099 
2100   case OpScalarVMulXLaneQ: {
2101     bool dummy = false;
2102     char type = ClassifyType(typestr, dummy, dummy, dummy);
2103     if (type == 'f') type = 's';
2104     std::string typeCode = "";
2105     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2106     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2107       typeCode + "(__a, 0);\\\n" +
2108       "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2109       typeCode + "(__b, __c);\\\n" +
2110       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2111       typeCode + "(__d1, __e1);\\\n" +
2112       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2113       "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2114     break;
2115   }
2116   case OpScalarQDMullLane: {
2117     std::string typeCode = "";
2118     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2119     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2120     "vget_lane_" + typeCode + "(b, __c));";
2121     break;
2122   }
2123   case OpScalarQDMullLaneQ: {
2124     std::string typeCode = "";
2125     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2126     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2127     "vgetq_lane_" + typeCode + "(b, __c));";
2128     break;
2129   }
2130   case OpScalarQDMulHiLane: {
2131     std::string typeCode = "";
2132     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2133     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2134     "vget_lane_" + typeCode + "(__b, __c));";
2135     break;
2136   }
2137   case OpScalarQDMulHiLaneQ: {
2138     std::string typeCode = "";
2139     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2140     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2141     "vgetq_lane_" + typeCode + "(__b, __c));";
2142     break;
2143   }
2144   case OpScalarQRDMulHiLane: {
2145     std::string typeCode = "";
2146     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2147     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2148     "vget_lane_" + typeCode + "(__b, __c));";
2149     break;
2150   }
2151   case OpScalarQRDMulHiLaneQ: {
2152     std::string typeCode = "";
2153     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2154     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2155     "vgetq_lane_" + typeCode + "(__b, __c));";
2156     break;
2157   }
2158   default:
2159     PrintFatalError("unknown OpKind!");
2160   }
2161   return s;
2162 }
2163 
2164 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2165   unsigned mod = proto[0];
2166 
2167   if (mod == 'v' || mod == 'f')
2168     mod = proto[1];
2169 
2170   bool quad = false;
2171   bool poly = false;
2172   bool usgn = false;
2173   bool scal = false;
2174   bool cnst = false;
2175   bool pntr = false;
2176 
2177   // Base type to get the type string for.
2178   char type = ClassifyType(typestr, quad, poly, usgn);
2179 
2180   // Based on the modifying character, change the type and width if necessary.
2181   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2182 
2183   NeonTypeFlags::EltType ET;
2184   switch (type) {
2185     case 'c':
2186       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2187       break;
2188     case 's':
2189       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2190       break;
2191     case 'i':
2192       ET = NeonTypeFlags::Int32;
2193       break;
2194     case 'l':
2195       ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2196       break;
2197     case 'h':
2198       ET = NeonTypeFlags::Float16;
2199       break;
2200     case 'f':
2201       ET = NeonTypeFlags::Float32;
2202       break;
2203     case 'd':
2204       ET = NeonTypeFlags::Float64;
2205       break;
2206     default:
2207       PrintFatalError("unhandled type!");
2208   }
2209   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2210   return Flags.getFlags();
2211 }
2212 
2213 static bool ProtoHasScalar(const std::string proto)
2214 {
2215   return (proto.find('s') != std::string::npos
2216           || proto.find('r') != std::string::npos);
2217 }
2218 
2219 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2220 static std::string GenBuiltin(const std::string &name, const std::string &proto,
2221                               StringRef typestr, ClassKind ck) {
2222   std::string s;
2223 
2224   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2225   // sret-like argument.
2226   bool sret = IsMultiVecProto(proto[0]);
2227 
2228   bool define = UseMacro(proto);
2229 
2230   // Check if the prototype has a scalar operand with the type of the vector
2231   // elements.  If not, bitcasting the args will take care of arg checking.
2232   // The actual signedness etc. will be taken care of with special enums.
2233   if (!ProtoHasScalar(proto))
2234     ck = ClassB;
2235 
2236   if (proto[0] != 'v') {
2237     std::string ts = TypeString(proto[0], typestr);
2238 
2239     if (define) {
2240       if (sret)
2241         s += ts + " r; ";
2242       else
2243         s += "(" + ts + ")";
2244     } else if (sret) {
2245       s += ts + " r; ";
2246     } else {
2247       s += "return (" + ts + ")";
2248     }
2249   }
2250 
2251   bool splat = proto.find('a') != std::string::npos;
2252 
2253   s += "__builtin_neon_";
2254   if (splat) {
2255     // Call the non-splat builtin: chop off the "_n" suffix from the name.
2256     std::string vname(name, 0, name.size()-2);
2257     s += MangleName(vname, typestr, ck);
2258   } else {
2259     s += MangleName(name, typestr, ck);
2260   }
2261   s += "(";
2262 
2263   // Pass the address of the return variable as the first argument to sret-like
2264   // builtins.
2265   if (sret)
2266     s += "&r, ";
2267 
2268   char arg = 'a';
2269   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2270     std::string args = std::string(&arg, 1);
2271 
2272     // Use the local temporaries instead of the macro arguments.
2273     args = "__" + args;
2274 
2275     bool argQuad = false;
2276     bool argPoly = false;
2277     bool argUsgn = false;
2278     bool argScalar = false;
2279     bool dummy = false;
2280     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2281     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2282                       dummy, dummy);
2283 
2284     // Handle multiple-vector values specially, emitting each subvector as an
2285     // argument to the __builtin.
2286     unsigned NumOfVec = 0;
2287     if (proto[i] >= '2' && proto[i] <= '4') {
2288       NumOfVec = proto[i] - '0';
2289     } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2290       NumOfVec = proto[i] - 'A' + 1;
2291     }
2292 
2293     if (NumOfVec > 0) {
2294       // Check if an explicit cast is needed.
2295       if (argType != 'c' || argPoly || argUsgn)
2296         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2297 
2298       for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2299         s += args + ".val[" + utostr(vi) + "]";
2300         if ((vi + 1) < ve)
2301           s += ", ";
2302       }
2303       if ((i + 1) < e)
2304         s += ", ";
2305 
2306       continue;
2307     }
2308 
2309     if (splat && (i + 1) == e)
2310       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2311 
2312     // Check if an explicit cast is needed.
2313     if ((splat || !argScalar) &&
2314         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2315       std::string argTypeStr = "c";
2316       if (ck != ClassB)
2317         argTypeStr = argType;
2318       if (argQuad)
2319         argTypeStr = "Q" + argTypeStr;
2320       args = "(" + TypeString('d', argTypeStr) + ")" + args;
2321     }
2322 
2323     s += args;
2324     if ((i + 1) < e)
2325       s += ", ";
2326   }
2327 
2328   // Extra constant integer to hold type class enum for this function, e.g. s8
2329   if (ck == ClassB)
2330     s += ", " + utostr(GetNeonEnum(proto, typestr));
2331 
2332   s += ");";
2333 
2334   if (proto[0] != 'v' && sret) {
2335     if (define)
2336       s += " r;";
2337     else
2338       s += " return r;";
2339   }
2340   return s;
2341 }
2342 
2343 static std::string GenBuiltinDef(const std::string &name,
2344                                  const std::string &proto,
2345                                  StringRef typestr, ClassKind ck) {
2346   std::string s("BUILTIN(__builtin_neon_");
2347 
2348   // If all types are the same size, bitcasting the args will take care
2349   // of arg checking.  The actual signedness etc. will be taken care of with
2350   // special enums.
2351   if (!ProtoHasScalar(proto))
2352     ck = ClassB;
2353 
2354   s += MangleName(name, typestr, ck);
2355   s += ", \"";
2356 
2357   for (unsigned i = 0, e = proto.size(); i != e; ++i)
2358     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2359 
2360   // Extra constant integer to hold type class enum for this function, e.g. s8
2361   if (ck == ClassB)
2362     s += "i";
2363 
2364   s += "\", \"n\")";
2365   return s;
2366 }
2367 
2368 static std::string GenIntrinsic(const std::string &name,
2369                                 const std::string &proto,
2370                                 StringRef outTypeStr, StringRef inTypeStr,
2371                                 OpKind kind, ClassKind classKind) {
2372   assert(!proto.empty() && "");
2373   bool define = UseMacro(proto) && kind != OpUnavailable;
2374   std::string s;
2375 
2376   // static always inline + return type
2377   if (define)
2378     s += "#define ";
2379   else
2380     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2381 
2382   // Function name with type suffix
2383   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2384   if (outTypeStr != inTypeStr) {
2385     // If the input type is different (e.g., for vreinterpret), append a suffix
2386     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2387     // does not insert another "q" in the name.
2388     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2389     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2390     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2391   }
2392   s += mangledName;
2393 
2394   // Function arguments
2395   s += GenArgs(proto, inTypeStr, name);
2396 
2397   // Definition.
2398   if (define) {
2399     s += " __extension__ ({ \\\n  ";
2400     s += GenMacroLocals(proto, inTypeStr, name);
2401   } else if (kind == OpUnavailable) {
2402     s += " __attribute__((unavailable));\n";
2403     return s;
2404   } else
2405     s += " {\n  ";
2406 
2407   if (kind != OpNone)
2408     s += GenOpString(name, kind, proto, outTypeStr);
2409   else
2410     s += GenBuiltin(name, proto, outTypeStr, classKind);
2411   if (define)
2412     s += " })";
2413   else
2414     s += " }";
2415   s += "\n";
2416   return s;
2417 }
2418 
2419 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2420 /// is comprised of type definitions and function declarations.
2421 void NeonEmitter::run(raw_ostream &OS) {
2422   OS <<
2423     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2424     "---===\n"
2425     " *\n"
2426     " * Permission is hereby granted, free of charge, to any person obtaining "
2427     "a copy\n"
2428     " * of this software and associated documentation files (the \"Software\"),"
2429     " to deal\n"
2430     " * in the Software without restriction, including without limitation the "
2431     "rights\n"
2432     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2433     "and/or sell\n"
2434     " * copies of the Software, and to permit persons to whom the Software is\n"
2435     " * furnished to do so, subject to the following conditions:\n"
2436     " *\n"
2437     " * The above copyright notice and this permission notice shall be "
2438     "included in\n"
2439     " * all copies or substantial portions of the Software.\n"
2440     " *\n"
2441     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2442     "EXPRESS OR\n"
2443     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2444     "MERCHANTABILITY,\n"
2445     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2446     "SHALL THE\n"
2447     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2448     "OTHER\n"
2449     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2450     "ARISING FROM,\n"
2451     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2452     "DEALINGS IN\n"
2453     " * THE SOFTWARE.\n"
2454     " *\n"
2455     " *===--------------------------------------------------------------------"
2456     "---===\n"
2457     " */\n\n";
2458 
2459   OS << "#ifndef __ARM_NEON_H\n";
2460   OS << "#define __ARM_NEON_H\n\n";
2461 
2462   OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
2463   OS << "#error \"NEON support not enabled\"\n";
2464   OS << "#endif\n\n";
2465 
2466   OS << "#include <stdint.h>\n\n";
2467 
2468   // Emit NEON-specific scalar typedefs.
2469   OS << "typedef float float32_t;\n";
2470   OS << "typedef __fp16 float16_t;\n";
2471 
2472   OS << "#ifdef __aarch64__\n";
2473   OS << "typedef double float64_t;\n";
2474   OS << "#endif\n\n";
2475 
2476   // For now, signedness of polynomial types depends on target
2477   OS << "#ifdef __aarch64__\n";
2478   OS << "typedef uint8_t poly8_t;\n";
2479   OS << "typedef uint16_t poly16_t;\n";
2480   OS << "typedef uint64_t poly64_t;\n";
2481   OS << "#else\n";
2482   OS << "typedef int8_t poly8_t;\n";
2483   OS << "typedef int16_t poly16_t;\n";
2484   OS << "#endif\n";
2485 
2486   // Emit Neon vector typedefs.
2487   std::string TypedefTypes(
2488       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2489   SmallVector<StringRef, 24> TDTypeVec;
2490   ParseTypes(0, TypedefTypes, TDTypeVec);
2491 
2492   // Emit vector typedefs.
2493   bool isA64 = false;
2494   bool preinsert;
2495   bool postinsert;
2496   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2497     bool dummy, quad = false, poly = false;
2498     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2499     preinsert = false;
2500     postinsert = false;
2501 
2502     if (type == 'd' || (type == 'l' && poly)) {
2503       preinsert = isA64? false: true;
2504       isA64 = true;
2505     } else {
2506       postinsert = isA64? true: false;
2507       isA64 = false;
2508     }
2509     if (postinsert)
2510       OS << "#endif\n";
2511     if (preinsert)
2512       OS << "#ifdef __aarch64__\n";
2513 
2514     if (poly)
2515       OS << "typedef __attribute__((neon_polyvector_type(";
2516     else
2517       OS << "typedef __attribute__((neon_vector_type(";
2518 
2519     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2520     OS << utostr(nElts) << "))) ";
2521     if (nElts < 10)
2522       OS << " ";
2523 
2524     OS << TypeString('s', TDTypeVec[i]);
2525     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2526 
2527   }
2528   postinsert = isA64? true: false;
2529   if (postinsert)
2530     OS << "#endif\n";
2531   OS << "\n";
2532 
2533   // Emit struct typedefs.
2534   isA64 = false;
2535   for (unsigned vi = 2; vi != 5; ++vi) {
2536     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2537       bool dummy, quad = false, poly = false;
2538       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2539       preinsert = false;
2540       postinsert = false;
2541 
2542       if (type == 'd' || (type == 'l' && poly)) {
2543         preinsert = isA64? false: true;
2544         isA64 = true;
2545       } else {
2546         postinsert = isA64? true: false;
2547         isA64 = false;
2548       }
2549       if (postinsert)
2550         OS << "#endif\n";
2551       if (preinsert)
2552         OS << "#ifdef __aarch64__\n";
2553 
2554       std::string ts = TypeString('d', TDTypeVec[i]);
2555       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2556       OS << "typedef struct " << vs << " {\n";
2557       OS << "  " << ts << " val";
2558       OS << "[" << utostr(vi) << "]";
2559       OS << ";\n} ";
2560       OS << vs << ";\n";
2561       OS << "\n";
2562     }
2563   }
2564   postinsert = isA64? true: false;
2565   if (postinsert)
2566     OS << "#endif\n";
2567   OS << "\n";
2568 
2569   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2570 
2571   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2572 
2573   StringMap<ClassKind> EmittedMap;
2574 
2575   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2576   // intrinsics.  (Some of the saturating multiply instructions are also
2577   // used to implement the corresponding "_lane" variants, but tablegen
2578   // sorts the records into alphabetical order so that the "_lane" variants
2579   // come after the intrinsics they use.)
2580   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2581   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2582   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2583   emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2584 
2585   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2586   // common intrinsics appear only once in the output stream.
2587   // The check for uniquiness is done in emitIntrinsic.
2588   // Emit ARM intrinsics.
2589   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2590     Record *R = RV[i];
2591 
2592     // Skip AArch64 intrinsics; they will be emitted at the end.
2593     bool isA64 = R->getValueAsBit("isA64");
2594     if (isA64)
2595       continue;
2596 
2597     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2598         R->getName() != "VABD")
2599       emitIntrinsic(OS, R, EmittedMap);
2600   }
2601 
2602   // Emit AArch64-specific intrinsics.
2603   OS << "#ifdef __aarch64__\n";
2604 
2605   emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2606   emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2607   emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2608 
2609   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2610     Record *R = RV[i];
2611 
2612     // Skip ARM intrinsics already included above.
2613     bool isA64 = R->getValueAsBit("isA64");
2614     if (!isA64)
2615       continue;
2616 
2617     // Skip crypto temporarily, and will emit them all together at the end.
2618     bool isCrypto = R->getValueAsBit("isCrypto");
2619     if (isCrypto)
2620       continue;
2621 
2622     emitIntrinsic(OS, R, EmittedMap);
2623   }
2624 
2625   OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
2626 
2627   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2628     Record *R = RV[i];
2629 
2630     // Skip crypto temporarily, and will emit them all together at the end.
2631     bool isCrypto = R->getValueAsBit("isCrypto");
2632     if (!isCrypto)
2633       continue;
2634 
2635     emitIntrinsic(OS, R, EmittedMap);
2636   }
2637 
2638   OS << "#endif\n\n";
2639 
2640   OS << "#endif\n\n";
2641 
2642   OS << "#undef __ai\n\n";
2643   OS << "#endif /* __ARM_NEON_H */\n";
2644 }
2645 
2646 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2647 /// intrinsics specified by record R checking for intrinsic uniqueness.
2648 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2649                                 StringMap<ClassKind> &EmittedMap) {
2650   std::string name = R->getValueAsString("Name");
2651   std::string Proto = R->getValueAsString("Prototype");
2652   std::string Types = R->getValueAsString("Types");
2653 
2654   SmallVector<StringRef, 16> TypeVec;
2655   ParseTypes(R, Types, TypeVec);
2656 
2657   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2658 
2659   ClassKind classKind = ClassNone;
2660   if (R->getSuperClasses().size() >= 2)
2661     classKind = ClassMap[R->getSuperClasses()[1]];
2662   if (classKind == ClassNone && kind == OpNone)
2663     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2664 
2665   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2666     if (kind == OpReinterpret) {
2667       bool outQuad = false;
2668       bool dummy = false;
2669       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2670       for (unsigned srcti = 0, srcte = TypeVec.size();
2671            srcti != srcte; ++srcti) {
2672         bool inQuad = false;
2673         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2674         if (srcti == ti || inQuad != outQuad)
2675           continue;
2676         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2677                                      OpCast, ClassS);
2678         if (EmittedMap.count(s))
2679           continue;
2680         EmittedMap[s] = ClassS;
2681         OS << s;
2682       }
2683     } else {
2684       std::string s =
2685           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2686       if (EmittedMap.count(s))
2687         continue;
2688       EmittedMap[s] = classKind;
2689       OS << s;
2690     }
2691   }
2692   OS << "\n";
2693 }
2694 
2695 static unsigned RangeFromType(const char mod, StringRef typestr) {
2696   // base type to get the type string for.
2697   bool quad = false, dummy = false;
2698   char type = ClassifyType(typestr, quad, dummy, dummy);
2699   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2700 
2701   switch (type) {
2702     case 'c':
2703       return (8 << (int)quad) - 1;
2704     case 'h':
2705     case 's':
2706       return (4 << (int)quad) - 1;
2707     case 'f':
2708     case 'i':
2709       return (2 << (int)quad) - 1;
2710     case 'd':
2711     case 'l':
2712       return (1 << (int)quad) - 1;
2713     default:
2714       PrintFatalError("unhandled type!");
2715   }
2716 }
2717 
2718 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2719   // base type to get the type string for.
2720   bool dummy = false;
2721   char type = ClassifyType(typestr, dummy, dummy, dummy);
2722   type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2723 
2724   switch (type) {
2725     case 'c':
2726       return 7;
2727     case 'h':
2728     case 's':
2729       return 15;
2730     case 'f':
2731     case 'i':
2732       return 31;
2733     case 'd':
2734     case 'l':
2735       return 63;
2736     default:
2737       PrintFatalError("unhandled type!");
2738   }
2739 }
2740 
2741 /// Generate the ARM and AArch64 intrinsic range checking code for
2742 /// shift/lane immediates, checking for unique declarations.
2743 void
2744 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2745                                         StringMap<ClassKind> &A64IntrinsicMap,
2746                                         bool isA64RangeCheck) {
2747   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2748   StringMap<OpKind> EmittedMap;
2749 
2750   // Generate the intrinsic range checking code for shift/lane immediates.
2751   if (isA64RangeCheck)
2752     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2753   else
2754     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2755 
2756   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2757     Record *R = RV[i];
2758 
2759     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2760     if (k != OpNone)
2761       continue;
2762 
2763     std::string name = R->getValueAsString("Name");
2764     std::string Proto = R->getValueAsString("Prototype");
2765     std::string Types = R->getValueAsString("Types");
2766     std::string Rename = name + "@" + Proto;
2767 
2768     // Functions with 'a' (the splat code) in the type prototype should not get
2769     // their own builtin as they use the non-splat variant.
2770     if (Proto.find('a') != std::string::npos)
2771       continue;
2772 
2773     // Functions which do not have an immediate do not need to have range
2774     // checking code emitted.
2775     size_t immPos = Proto.find('i');
2776     if (immPos == std::string::npos)
2777       continue;
2778 
2779     SmallVector<StringRef, 16> TypeVec;
2780     ParseTypes(R, Types, TypeVec);
2781 
2782     if (R->getSuperClasses().size() < 2)
2783       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2784 
2785     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2786 
2787     // Do not include AArch64 range checks if not generating code for AArch64.
2788     bool isA64 = R->getValueAsBit("isA64");
2789     if (!isA64RangeCheck && isA64)
2790       continue;
2791 
2792     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2793     // redefined by AArch64 to handle new types.
2794     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2795       ClassKind &A64CK = A64IntrinsicMap[Rename];
2796       if (A64CK == ck && ck != ClassNone)
2797         continue;
2798     }
2799 
2800     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2801       std::string namestr, shiftstr, rangestr;
2802 
2803       if (R->getValueAsBit("isVCVT_N")) {
2804         // VCVT between floating- and fixed-point values takes an immediate
2805         // in the range [1, 32] for f32, or [1, 64] for f64.
2806         ck = ClassB;
2807         if (name.find("32") != std::string::npos)
2808           rangestr = "l = 1; u = 31"; // upper bound = l + u
2809         else if (name.find("64") != std::string::npos)
2810           rangestr = "l = 1; u = 63";
2811         else
2812           PrintFatalError(R->getLoc(),
2813               "Fixed point convert name should contains \"32\" or \"64\"");
2814 
2815       } else if (R->getValueAsBit("isScalarShift")) {
2816         // Right shifts have an 'r' in the name, left shifts do not.  Convert
2817         // instructions have the same bounds and right shifts.
2818         if (name.find('r') != std::string::npos ||
2819             name.find("cvt") != std::string::npos)
2820           rangestr = "l = 1; ";
2821 
2822         rangestr += "u = " +
2823           utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
2824       } else if (!ProtoHasScalar(Proto)) {
2825         // Builtins which are overloaded by type will need to have their upper
2826         // bound computed at Sema time based on the type constant.
2827         ck = ClassB;
2828         if (R->getValueAsBit("isShift")) {
2829           shiftstr = ", true";
2830 
2831           // Right shifts have an 'r' in the name, left shifts do not.
2832           if (name.find('r') != std::string::npos)
2833             rangestr = "l = 1; ";
2834         }
2835         rangestr += "u = RFT(TV" + shiftstr + ")";
2836       } else {
2837         // The immediate generally refers to a lane in the preceding argument.
2838         assert(immPos > 0 && "unexpected immediate operand");
2839         rangestr =
2840             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2841       }
2842       // Make sure cases appear only once by uniquing them in a string map.
2843       namestr = MangleName(name, TypeVec[ti], ck);
2844       if (EmittedMap.count(namestr))
2845         continue;
2846       EmittedMap[namestr] = OpNone;
2847 
2848       // Calculate the index of the immediate that should be range checked.
2849       unsigned immidx = 0;
2850 
2851       // Builtins that return a struct of multiple vectors have an extra
2852       // leading arg for the struct return.
2853       if (IsMultiVecProto(Proto[0]))
2854         ++immidx;
2855 
2856       // Add one to the index for each argument until we reach the immediate
2857       // to be checked.  Structs of vectors are passed as multiple arguments.
2858       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2859         switch (Proto[ii]) {
2860         default:
2861           immidx += 1;
2862           break;
2863         case '2':
2864         case 'B':
2865           immidx += 2;
2866           break;
2867         case '3':
2868         case 'C':
2869           immidx += 3;
2870           break;
2871         case '4':
2872         case 'D':
2873           immidx += 4;
2874           break;
2875         case 'i':
2876           ie = ii + 1;
2877           break;
2878         }
2879       }
2880       if (isA64RangeCheck)
2881         OS << "case AArch64::BI__builtin_neon_";
2882       else
2883         OS << "case ARM::BI__builtin_neon_";
2884       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2885          << rangestr << "; break;\n";
2886     }
2887   }
2888   OS << "#endif\n\n";
2889 }
2890 
2891 /// Generate the ARM and AArch64 overloaded type checking code for
2892 /// SemaChecking.cpp, checking for unique builtin declarations.
2893 void
2894 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2895                                       StringMap<ClassKind> &A64IntrinsicMap,
2896                                       bool isA64TypeCheck) {
2897   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2898   StringMap<OpKind> EmittedMap;
2899 
2900   // Generate the overloaded type checking code for SemaChecking.cpp
2901   if (isA64TypeCheck)
2902     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2903   else
2904     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2905 
2906   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2907     Record *R = RV[i];
2908     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2909     if (k != OpNone)
2910       continue;
2911 
2912     std::string Proto = R->getValueAsString("Prototype");
2913     std::string Types = R->getValueAsString("Types");
2914     std::string name = R->getValueAsString("Name");
2915     std::string Rename = name + "@" + Proto;
2916 
2917     // Functions with 'a' (the splat code) in the type prototype should not get
2918     // their own builtin as they use the non-splat variant.
2919     if (Proto.find('a') != std::string::npos)
2920       continue;
2921 
2922     // Functions which have a scalar argument cannot be overloaded, no need to
2923     // check them if we are emitting the type checking code.
2924     if (ProtoHasScalar(Proto))
2925       continue;
2926 
2927     SmallVector<StringRef, 16> TypeVec;
2928     ParseTypes(R, Types, TypeVec);
2929 
2930     if (R->getSuperClasses().size() < 2)
2931       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2932 
2933     // Do not include AArch64 type checks if not generating code for AArch64.
2934     bool isA64 = R->getValueAsBit("isA64");
2935     if (!isA64TypeCheck && isA64)
2936       continue;
2937 
2938     // Include ARM  type check in AArch64 but only if ARM intrinsics
2939     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2940     // redefined in AArch64 to handle an additional 2 x f64 type.
2941     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2942     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2943       ClassKind &A64CK = A64IntrinsicMap[Rename];
2944       if (A64CK == ck && ck != ClassNone)
2945         continue;
2946     }
2947 
2948     int si = -1, qi = -1;
2949     uint64_t mask = 0, qmask = 0;
2950     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2951       // Generate the switch case(s) for this builtin for the type validation.
2952       bool quad = false, poly = false, usgn = false;
2953       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2954 
2955       if (quad) {
2956         qi = ti;
2957         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2958       } else {
2959         si = ti;
2960         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2961       }
2962     }
2963 
2964     // Check if the builtin function has a pointer or const pointer argument.
2965     int PtrArgNum = -1;
2966     bool HasConstPtr = false;
2967     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2968       char ArgType = Proto[arg];
2969       if (ArgType == 'c') {
2970         HasConstPtr = true;
2971         PtrArgNum = arg - 1;
2972         break;
2973       }
2974       if (ArgType == 'p') {
2975         PtrArgNum = arg - 1;
2976         break;
2977       }
2978     }
2979     // For sret builtins, adjust the pointer argument index.
2980     if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
2981       PtrArgNum += 1;
2982 
2983     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2984     // and vst1_lane intrinsics.  Using a pointer to the vector element
2985     // type with one of those operations causes codegen to select an aligned
2986     // load/store instruction.  If you want an unaligned operation,
2987     // the pointer argument needs to have less alignment than element type,
2988     // so just accept any pointer type.
2989     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2990       PtrArgNum = -1;
2991       HasConstPtr = false;
2992     }
2993 
2994     if (mask) {
2995       if (isA64TypeCheck)
2996         OS << "case AArch64::BI__builtin_neon_";
2997       else
2998         OS << "case ARM::BI__builtin_neon_";
2999       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
3000          << "0x" << utohexstr(mask) << "ULL";
3001       if (PtrArgNum >= 0)
3002         OS << "; PtrArgNum = " << PtrArgNum;
3003       if (HasConstPtr)
3004         OS << "; HasConstPtr = true";
3005       OS << "; break;\n";
3006     }
3007     if (qmask) {
3008       if (isA64TypeCheck)
3009         OS << "case AArch64::BI__builtin_neon_";
3010       else
3011         OS << "case ARM::BI__builtin_neon_";
3012       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
3013          << "0x" << utohexstr(qmask) << "ULL";
3014       if (PtrArgNum >= 0)
3015         OS << "; PtrArgNum = " << PtrArgNum;
3016       if (HasConstPtr)
3017         OS << "; HasConstPtr = true";
3018       OS << "; break;\n";
3019     }
3020   }
3021   OS << "#endif\n\n";
3022 }
3023 
3024 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
3025 /// declaration of builtins, checking for unique builtin declarations.
3026 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
3027                                  StringMap<ClassKind> &A64IntrinsicMap,
3028                                  bool isA64GenBuiltinDef) {
3029   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3030   StringMap<OpKind> EmittedMap;
3031 
3032   // Generate BuiltinsARM.def and BuiltinsAArch64.def
3033   if (isA64GenBuiltinDef)
3034     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
3035   else
3036     OS << "#ifdef GET_NEON_BUILTINS\n";
3037 
3038   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3039     Record *R = RV[i];
3040     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3041     if (k != OpNone)
3042       continue;
3043 
3044     std::string Proto = R->getValueAsString("Prototype");
3045     std::string name = R->getValueAsString("Name");
3046     std::string Rename = name + "@" + Proto;
3047 
3048     // Functions with 'a' (the splat code) in the type prototype should not get
3049     // their own builtin as they use the non-splat variant.
3050     if (Proto.find('a') != std::string::npos)
3051       continue;
3052 
3053     std::string Types = R->getValueAsString("Types");
3054     SmallVector<StringRef, 16> TypeVec;
3055     ParseTypes(R, Types, TypeVec);
3056 
3057     if (R->getSuperClasses().size() < 2)
3058       PrintFatalError(R->getLoc(), "Builtin has no class kind");
3059 
3060     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3061 
3062     // Do not include AArch64 BUILTIN() macros if not generating
3063     // code for AArch64
3064     bool isA64 = R->getValueAsBit("isA64");
3065     if (!isA64GenBuiltinDef && isA64)
3066       continue;
3067 
3068     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
3069     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3070     // redefined in AArch64 to handle an additional 2 x f64 type.
3071     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
3072       ClassKind &A64CK = A64IntrinsicMap[Rename];
3073       if (A64CK == ck && ck != ClassNone)
3074         continue;
3075     }
3076 
3077     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3078       // Generate the declaration for this builtin, ensuring
3079       // that each unique BUILTIN() macro appears only once in the output
3080       // stream.
3081       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3082       if (EmittedMap.count(bd))
3083         continue;
3084 
3085       EmittedMap[bd] = OpNone;
3086       OS << bd << "\n";
3087     }
3088   }
3089   OS << "#endif\n\n";
3090 }
3091 
3092 /// runHeader - Emit a file with sections defining:
3093 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3094 /// 2. the SemaChecking code for the type overload checking.
3095 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3096 void NeonEmitter::runHeader(raw_ostream &OS) {
3097   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3098 
3099   // build a map of AArch64 intriniscs to be used in uniqueness checks.
3100   StringMap<ClassKind> A64IntrinsicMap;
3101   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3102     Record *R = RV[i];
3103 
3104     bool isA64 = R->getValueAsBit("isA64");
3105     if (!isA64)
3106       continue;
3107 
3108     ClassKind CK = ClassNone;
3109     if (R->getSuperClasses().size() >= 2)
3110       CK = ClassMap[R->getSuperClasses()[1]];
3111 
3112     std::string Name = R->getValueAsString("Name");
3113     std::string Proto = R->getValueAsString("Prototype");
3114     std::string Rename = Name + "@" + Proto;
3115     if (A64IntrinsicMap.count(Rename))
3116       continue;
3117     A64IntrinsicMap[Rename] = CK;
3118   }
3119 
3120   // Generate BuiltinsARM.def for ARM
3121   genBuiltinsDef(OS, A64IntrinsicMap, false);
3122 
3123   // Generate BuiltinsAArch64.def for AArch64
3124   genBuiltinsDef(OS, A64IntrinsicMap, true);
3125 
3126   // Generate ARM overloaded type checking code for SemaChecking.cpp
3127   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3128 
3129   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3130   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3131 
3132   // Generate ARM range checking code for shift/lane immediates.
3133   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3134 
3135   // Generate the AArch64 range checking code for shift/lane immediates.
3136   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3137 }
3138 
3139 /// GenTest - Write out a test for the intrinsic specified by the name and
3140 /// type strings, including the embedded patterns for FileCheck to match.
3141 static std::string GenTest(const std::string &name,
3142                            const std::string &proto,
3143                            StringRef outTypeStr, StringRef inTypeStr,
3144                            bool isShift, bool isHiddenLOp,
3145                            ClassKind ck, const std::string &InstName,
3146                            bool isA64,
3147                            std::string & testFuncProto) {
3148   assert(!proto.empty() && "");
3149   std::string s;
3150 
3151   // Function name with type suffix
3152   std::string mangledName = MangleName(name, outTypeStr, ClassS);
3153   if (outTypeStr != inTypeStr) {
3154     // If the input type is different (e.g., for vreinterpret), append a suffix
3155     // for the input type.  String off a "Q" (quad) prefix so that MangleName
3156     // does not insert another "q" in the name.
3157     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3158     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3159     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3160   }
3161 
3162   // todo: GenerateChecksForIntrinsic does not generate CHECK
3163   // for aarch64 instructions yet
3164   std::vector<std::string> FileCheckPatterns;
3165   if (!isA64) {
3166 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3167 							   isHiddenLOp, FileCheckPatterns);
3168 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
3169   }
3170   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3171 
3172   // Emit the FileCheck patterns.
3173   // If for any reason we do not want to emit a check, mangledInst
3174   // will be the empty string.
3175   if (FileCheckPatterns.size()) {
3176     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3177                                                   e = FileCheckPatterns.end();
3178          i != e;
3179          ++i) {
3180       s += "// CHECK_ARM: " + *i + "\n";
3181     }
3182   }
3183 
3184   // Emit the start of the test function.
3185 
3186   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3187   char arg = 'a';
3188   std::string comma;
3189   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3190     // Do not create arguments for values that must be immediate constants.
3191     if (proto[i] == 'i')
3192       continue;
3193     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3194     testFuncProto.push_back(arg);
3195     comma = ", ";
3196   }
3197   testFuncProto += ")";
3198 
3199   s+= testFuncProto;
3200   s+= " {\n  ";
3201 
3202   if (proto[0] != 'v')
3203     s += "return ";
3204   s += mangledName + "(";
3205   arg = 'a';
3206   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3207     if (proto[i] == 'i') {
3208       // For immediate operands, test the maximum value.
3209       if (isShift)
3210         s += "1"; // FIXME
3211       else
3212         // The immediate generally refers to a lane in the preceding argument.
3213         s += utostr(RangeFromType(proto[i-1], inTypeStr));
3214     } else {
3215       s.push_back(arg);
3216     }
3217     if ((i + 1) < e)
3218       s += ", ";
3219   }
3220   s += ");\n}\n\n";
3221   return s;
3222 }
3223 
3224 /// Write out all intrinsic tests for the specified target, checking
3225 /// for intrinsic test uniqueness.
3226 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3227                                 bool isA64GenTest) {
3228   if (isA64GenTest)
3229 	OS << "#ifdef __aarch64__\n";
3230 
3231   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3232   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3233     Record *R = RV[i];
3234     std::string name = R->getValueAsString("Name");
3235     std::string Proto = R->getValueAsString("Prototype");
3236     std::string Types = R->getValueAsString("Types");
3237     bool isShift = R->getValueAsBit("isShift");
3238     std::string InstName = R->getValueAsString("InstName");
3239     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3240     bool isA64 = R->getValueAsBit("isA64");
3241 
3242     // do not include AArch64 intrinsic test if not generating
3243     // code for AArch64
3244     if (!isA64GenTest && isA64)
3245       continue;
3246 
3247     SmallVector<StringRef, 16> TypeVec;
3248     ParseTypes(R, Types, TypeVec);
3249 
3250     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3251     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3252     if (kind == OpUnavailable)
3253       continue;
3254     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3255       if (kind == OpReinterpret) {
3256         bool outQuad = false;
3257         bool dummy = false;
3258         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3259         for (unsigned srcti = 0, srcte = TypeVec.size();
3260              srcti != srcte; ++srcti) {
3261           bool inQuad = false;
3262           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3263           if (srcti == ti || inQuad != outQuad)
3264             continue;
3265 		  std::string testFuncProto;
3266           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3267                                   isShift, isHiddenLOp, ck, InstName, isA64,
3268 								  testFuncProto);
3269           if (EmittedMap.count(testFuncProto))
3270             continue;
3271           EmittedMap[testFuncProto] = kind;
3272           OS << s << "\n";
3273         }
3274       } else {
3275 		std::string testFuncProto;
3276         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3277                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
3278         if (EmittedMap.count(testFuncProto))
3279           continue;
3280         EmittedMap[testFuncProto] = kind;
3281         OS << s << "\n";
3282       }
3283     }
3284   }
3285 
3286   if (isA64GenTest)
3287 	OS << "#endif\n";
3288 }
3289 /// runTests - Write out a complete set of tests for all of the Neon
3290 /// intrinsics.
3291 void NeonEmitter::runTests(raw_ostream &OS) {
3292   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3293         "apcs-gnu\\\n"
3294         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3295         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3296 		"\n"
3297 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3298 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3299 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3300         "\n"
3301         "// REQUIRES: long_tests\n"
3302         "\n"
3303         "#include <arm_neon.h>\n"
3304         "\n";
3305 
3306   // ARM tests must be emitted before AArch64 tests to ensure
3307   // tests for intrinsics that are common to ARM and AArch64
3308   // appear only once in the output stream.
3309   // The check for uniqueness is done in genTargetTest.
3310   StringMap<OpKind> EmittedMap;
3311 
3312   genTargetTest(OS, EmittedMap, false);
3313 
3314   genTargetTest(OS, EmittedMap, true);
3315 }
3316 
3317 namespace clang {
3318 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3319   NeonEmitter(Records).run(OS);
3320 }
3321 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3322   NeonEmitter(Records).runHeader(OS);
3323 }
3324 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3325   NeonEmitter(Records).runTests(OS);
3326 }
3327 } // End namespace clang
3328