1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. A complete set of tests 22 // for Neon intrinsics can be generated by calling the runTests() entry point. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringMap.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/TableGen/Error.h" 33 #include "llvm/TableGen/Record.h" 34 #include "llvm/TableGen/TableGenBackend.h" 35 #include <string> 36 using namespace llvm; 37 38 enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpTrn1, 85 OpZip1, 86 OpUzp1, 87 OpTrn2, 88 OpZip2, 89 OpUzp2, 90 OpEq, 91 OpGe, 92 OpLe, 93 OpGt, 94 OpLt, 95 OpNeg, 96 OpNot, 97 OpAnd, 98 OpOr, 99 OpXor, 100 OpAndNot, 101 OpOrNot, 102 OpCast, 103 OpConcat, 104 OpDup, 105 OpDupLane, 106 OpHi, 107 OpLo, 108 OpSelect, 109 OpRev16, 110 OpRev32, 111 OpRev64, 112 OpXtnHi, 113 OpSqxtunHi, 114 OpQxtnHi, 115 OpFcvtnHi, 116 OpFcvtlHi, 117 OpFcvtxnHi, 118 OpReinterpret, 119 OpAddhnHi, 120 OpRAddhnHi, 121 OpSubhnHi, 122 OpRSubhnHi, 123 OpAbdl, 124 OpAbdlHi, 125 OpAba, 126 OpAbal, 127 OpAbalHi, 128 OpQDMullHi, 129 OpQDMlalHi, 130 OpQDMlslHi, 131 OpDiv, 132 OpLongHi, 133 OpNarrowHi, 134 OpMovlHi, 135 OpCopyLane, 136 OpCopyQLane, 137 OpCopyLaneQ, 138 OpScalarMulLane, 139 OpScalarMulLaneQ, 140 OpScalarMulXLane, 141 OpScalarMulXLaneQ, 142 OpScalarVMulXLane, 143 OpScalarVMulXLaneQ, 144 OpScalarQDMullLane, 145 OpScalarQDMullLaneQ, 146 OpScalarQDMulHiLane, 147 OpScalarQDMulHiLaneQ, 148 OpScalarQRDMulHiLane, 149 OpScalarQRDMulHiLaneQ 150 }; 151 152 enum ClassKind { 153 ClassNone, 154 ClassI, // generic integer instruction, e.g., "i8" suffix 155 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 156 ClassW, // width-specific instruction, e.g., "8" suffix 157 ClassB, // bitcast arguments with enum argument to specify type 158 ClassL, // Logical instructions which are op instructions 159 // but we need to not emit any suffix for in our 160 // tests. 161 ClassNoTest // Instructions which we do not test since they are 162 // not TRUE instructions. 163 }; 164 165 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 166 /// builtins. These must be kept in sync with the flags in 167 /// include/clang/Basic/TargetBuiltins.h. 168 namespace { 169 class NeonTypeFlags { 170 enum { 171 EltTypeMask = 0xf, 172 UnsignedFlag = 0x10, 173 QuadFlag = 0x20 174 }; 175 uint32_t Flags; 176 177 public: 178 enum EltType { 179 Int8, 180 Int16, 181 Int32, 182 Int64, 183 Poly8, 184 Poly16, 185 Poly64, 186 Float16, 187 Float32, 188 Float64 189 }; 190 191 NeonTypeFlags(unsigned F) : Flags(F) {} 192 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 193 if (IsUnsigned) 194 Flags |= UnsignedFlag; 195 if (IsQuad) 196 Flags |= QuadFlag; 197 } 198 199 uint32_t getFlags() const { return Flags; } 200 }; 201 } // end anonymous namespace 202 203 namespace { 204 class NeonEmitter { 205 RecordKeeper &Records; 206 StringMap<OpKind> OpMap; 207 DenseMap<Record*, ClassKind> ClassMap; 208 209 public: 210 NeonEmitter(RecordKeeper &R) : Records(R) { 211 OpMap["OP_NONE"] = OpNone; 212 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 213 OpMap["OP_ADD"] = OpAdd; 214 OpMap["OP_ADDL"] = OpAddl; 215 OpMap["OP_ADDLHi"] = OpAddlHi; 216 OpMap["OP_ADDW"] = OpAddw; 217 OpMap["OP_ADDWHi"] = OpAddwHi; 218 OpMap["OP_SUB"] = OpSub; 219 OpMap["OP_SUBL"] = OpSubl; 220 OpMap["OP_SUBLHi"] = OpSublHi; 221 OpMap["OP_SUBW"] = OpSubw; 222 OpMap["OP_SUBWHi"] = OpSubwHi; 223 OpMap["OP_MUL"] = OpMul; 224 OpMap["OP_MLA"] = OpMla; 225 OpMap["OP_MLAL"] = OpMlal; 226 OpMap["OP_MULLHi"] = OpMullHi; 227 OpMap["OP_MLALHi"] = OpMlalHi; 228 OpMap["OP_MLS"] = OpMls; 229 OpMap["OP_MLSL"] = OpMlsl; 230 OpMap["OP_MLSLHi"] = OpMlslHi; 231 OpMap["OP_MUL_N"] = OpMulN; 232 OpMap["OP_MLA_N"] = OpMlaN; 233 OpMap["OP_MLS_N"] = OpMlsN; 234 OpMap["OP_MLAL_N"] = OpMlalN; 235 OpMap["OP_MLSL_N"] = OpMlslN; 236 OpMap["OP_MUL_LN"]= OpMulLane; 237 OpMap["OP_MULX_LN"]= OpMulXLane; 238 OpMap["OP_MULL_LN"] = OpMullLane; 239 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 240 OpMap["OP_MLA_LN"]= OpMlaLane; 241 OpMap["OP_MLS_LN"]= OpMlsLane; 242 OpMap["OP_MLAL_LN"] = OpMlalLane; 243 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 244 OpMap["OP_MLSL_LN"] = OpMlslLane; 245 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 246 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 247 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 248 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 249 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 250 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 251 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 252 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 253 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 254 OpMap["OP_FMS_LN"] = OpFMSLane; 255 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 256 OpMap["OP_TRN1"] = OpTrn1; 257 OpMap["OP_ZIP1"] = OpZip1; 258 OpMap["OP_UZP1"] = OpUzp1; 259 OpMap["OP_TRN2"] = OpTrn2; 260 OpMap["OP_ZIP2"] = OpZip2; 261 OpMap["OP_UZP2"] = OpUzp2; 262 OpMap["OP_EQ"] = OpEq; 263 OpMap["OP_GE"] = OpGe; 264 OpMap["OP_LE"] = OpLe; 265 OpMap["OP_GT"] = OpGt; 266 OpMap["OP_LT"] = OpLt; 267 OpMap["OP_NEG"] = OpNeg; 268 OpMap["OP_NOT"] = OpNot; 269 OpMap["OP_AND"] = OpAnd; 270 OpMap["OP_OR"] = OpOr; 271 OpMap["OP_XOR"] = OpXor; 272 OpMap["OP_ANDN"] = OpAndNot; 273 OpMap["OP_ORN"] = OpOrNot; 274 OpMap["OP_CAST"] = OpCast; 275 OpMap["OP_CONC"] = OpConcat; 276 OpMap["OP_HI"] = OpHi; 277 OpMap["OP_LO"] = OpLo; 278 OpMap["OP_DUP"] = OpDup; 279 OpMap["OP_DUP_LN"] = OpDupLane; 280 OpMap["OP_SEL"] = OpSelect; 281 OpMap["OP_REV16"] = OpRev16; 282 OpMap["OP_REV32"] = OpRev32; 283 OpMap["OP_REV64"] = OpRev64; 284 OpMap["OP_XTN"] = OpXtnHi; 285 OpMap["OP_SQXTUN"] = OpSqxtunHi; 286 OpMap["OP_QXTN"] = OpQxtnHi; 287 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; 288 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; 289 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; 290 OpMap["OP_REINT"] = OpReinterpret; 291 OpMap["OP_ADDHNHi"] = OpAddhnHi; 292 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 293 OpMap["OP_SUBHNHi"] = OpSubhnHi; 294 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 295 OpMap["OP_ABDL"] = OpAbdl; 296 OpMap["OP_ABDLHi"] = OpAbdlHi; 297 OpMap["OP_ABA"] = OpAba; 298 OpMap["OP_ABAL"] = OpAbal; 299 OpMap["OP_ABALHi"] = OpAbalHi; 300 OpMap["OP_QDMULLHi"] = OpQDMullHi; 301 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 302 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 303 OpMap["OP_DIV"] = OpDiv; 304 OpMap["OP_LONG_HI"] = OpLongHi; 305 OpMap["OP_NARROW_HI"] = OpNarrowHi; 306 OpMap["OP_MOVL_HI"] = OpMovlHi; 307 OpMap["OP_COPY_LN"] = OpCopyLane; 308 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 309 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 310 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; 311 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; 312 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; 313 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; 314 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; 315 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; 316 OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane; 317 OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ; 318 OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane; 319 OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; 320 OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; 321 OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; 322 323 324 Record *SI = R.getClass("SInst"); 325 Record *II = R.getClass("IInst"); 326 Record *WI = R.getClass("WInst"); 327 Record *SOpI = R.getClass("SOpInst"); 328 Record *IOpI = R.getClass("IOpInst"); 329 Record *WOpI = R.getClass("WOpInst"); 330 Record *LOpI = R.getClass("LOpInst"); 331 Record *NoTestOpI = R.getClass("NoTestOpInst"); 332 333 ClassMap[SI] = ClassS; 334 ClassMap[II] = ClassI; 335 ClassMap[WI] = ClassW; 336 ClassMap[SOpI] = ClassS; 337 ClassMap[IOpI] = ClassI; 338 ClassMap[WOpI] = ClassW; 339 ClassMap[LOpI] = ClassL; 340 ClassMap[NoTestOpI] = ClassNoTest; 341 } 342 343 // run - Emit arm_neon.h.inc 344 void run(raw_ostream &o); 345 346 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 347 void runHeader(raw_ostream &o); 348 349 // runTests - Emit tests for all the Neon intrinsics. 350 void runTests(raw_ostream &o); 351 352 private: 353 void emitIntrinsic(raw_ostream &OS, Record *R, 354 StringMap<ClassKind> &EmittedMap); 355 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 356 bool isA64GenBuiltinDef); 357 void genOverloadTypeCheckCode(raw_ostream &OS, 358 StringMap<ClassKind> &A64IntrinsicMap, 359 bool isA64TypeCheck); 360 void genIntrinsicRangeCheckCode(raw_ostream &OS, 361 StringMap<ClassKind> &A64IntrinsicMap, 362 bool isA64RangeCheck); 363 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 364 bool isA64TestGen); 365 }; 366 } // end anonymous namespace 367 368 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 369 /// which each StringRef representing a single type declared in the string. 370 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 371 /// 2xfloat and 4xfloat respectively. 372 static void ParseTypes(Record *r, std::string &s, 373 SmallVectorImpl<StringRef> &TV) { 374 const char *data = s.data(); 375 int len = 0; 376 377 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 378 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 379 || data[len] == 'H' || data[len] == 'S') 380 continue; 381 382 switch (data[len]) { 383 case 'c': 384 case 's': 385 case 'i': 386 case 'l': 387 case 'h': 388 case 'f': 389 case 'd': 390 break; 391 default: 392 PrintFatalError(r->getLoc(), 393 "Unexpected letter: " + std::string(data + len, 1)); 394 } 395 TV.push_back(StringRef(data, len + 1)); 396 data += len + 1; 397 len = -1; 398 } 399 } 400 401 /// Widen - Convert a type code into the next wider type. char -> short, 402 /// short -> int, etc. 403 static char Widen(const char t) { 404 switch (t) { 405 case 'c': 406 return 's'; 407 case 's': 408 return 'i'; 409 case 'i': 410 return 'l'; 411 case 'h': 412 return 'f'; 413 case 'f': 414 return 'd'; 415 default: 416 PrintFatalError("unhandled type in widen!"); 417 } 418 } 419 420 /// Narrow - Convert a type code into the next smaller type. short -> char, 421 /// float -> half float, etc. 422 static char Narrow(const char t) { 423 switch (t) { 424 case 's': 425 return 'c'; 426 case 'i': 427 return 's'; 428 case 'l': 429 return 'i'; 430 case 'f': 431 return 'h'; 432 case 'd': 433 return 'f'; 434 default: 435 PrintFatalError("unhandled type in narrow!"); 436 } 437 } 438 439 static std::string GetNarrowTypestr(StringRef ty) 440 { 441 std::string s; 442 for (size_t i = 0, end = ty.size(); i < end; i++) { 443 switch (ty[i]) { 444 case 's': 445 s += 'c'; 446 break; 447 case 'i': 448 s += 's'; 449 break; 450 case 'l': 451 s += 'i'; 452 break; 453 default: 454 s += ty[i]; 455 break; 456 } 457 } 458 459 return s; 460 } 461 462 /// For a particular StringRef, return the base type code, and whether it has 463 /// the quad-vector, polynomial, or unsigned modifiers set. 464 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 465 unsigned off = 0; 466 // ignore scalar. 467 if (ty[off] == 'S') { 468 ++off; 469 } 470 // remember quad. 471 if (ty[off] == 'Q' || ty[off] == 'H') { 472 quad = true; 473 ++off; 474 } 475 476 // remember poly. 477 if (ty[off] == 'P') { 478 poly = true; 479 ++off; 480 } 481 482 // remember unsigned. 483 if (ty[off] == 'U') { 484 usgn = true; 485 ++off; 486 } 487 488 // base type to get the type string for. 489 return ty[off]; 490 } 491 492 /// ModType - Transform a type code and its modifiers based on a mod code. The 493 /// mod code definitions may be found at the top of arm_neon.td. 494 static char ModType(const char mod, char type, bool &quad, bool &poly, 495 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 496 switch (mod) { 497 case 't': 498 if (poly) { 499 poly = false; 500 usgn = true; 501 } 502 break; 503 case 'b': 504 scal = true; 505 case 'u': 506 usgn = true; 507 poly = false; 508 if (type == 'f') 509 type = 'i'; 510 if (type == 'd') 511 type = 'l'; 512 break; 513 case '$': 514 scal = true; 515 case 'x': 516 usgn = false; 517 poly = false; 518 if (type == 'f') 519 type = 'i'; 520 if (type == 'd') 521 type = 'l'; 522 break; 523 case 'o': 524 scal = true; 525 type = 'd'; 526 usgn = false; 527 break; 528 case 'y': 529 scal = true; 530 case 'f': 531 if (type == 'h') 532 quad = true; 533 type = 'f'; 534 usgn = false; 535 break; 536 case 'g': 537 quad = false; 538 break; 539 case 'B': 540 case 'C': 541 case 'D': 542 case 'j': 543 quad = true; 544 break; 545 case 'w': 546 type = Widen(type); 547 quad = true; 548 break; 549 case 'n': 550 type = Widen(type); 551 break; 552 case 'i': 553 type = 'i'; 554 scal = true; 555 break; 556 case 'l': 557 type = 'l'; 558 scal = true; 559 usgn = true; 560 break; 561 case 'z': 562 type = Narrow(type); 563 scal = true; 564 break; 565 case 'r': 566 type = Widen(type); 567 scal = true; 568 break; 569 case 's': 570 case 'a': 571 scal = true; 572 break; 573 case 'k': 574 quad = true; 575 break; 576 case 'c': 577 cnst = true; 578 case 'p': 579 pntr = true; 580 scal = true; 581 break; 582 case 'h': 583 type = Narrow(type); 584 if (type == 'h') 585 quad = false; 586 break; 587 case 'q': 588 type = Narrow(type); 589 quad = true; 590 break; 591 case 'e': 592 type = Narrow(type); 593 usgn = true; 594 break; 595 case 'm': 596 type = Narrow(type); 597 quad = false; 598 break; 599 default: 600 break; 601 } 602 return type; 603 } 604 605 static bool IsMultiVecProto(const char p) { 606 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 607 } 608 609 /// TypeString - for a modifier and type, generate the name of the typedef for 610 /// that type. QUc -> uint8x8_t. 611 static std::string TypeString(const char mod, StringRef typestr) { 612 bool quad = false; 613 bool poly = false; 614 bool usgn = false; 615 bool scal = false; 616 bool cnst = false; 617 bool pntr = false; 618 619 if (mod == 'v') 620 return "void"; 621 if (mod == 'i') 622 return "int"; 623 624 // base type to get the type string for. 625 char type = ClassifyType(typestr, quad, poly, usgn); 626 627 // Based on the modifying character, change the type and width if necessary. 628 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 629 630 SmallString<128> s; 631 632 if (usgn) 633 s.push_back('u'); 634 635 switch (type) { 636 case 'c': 637 s += poly ? "poly8" : "int8"; 638 if (scal) 639 break; 640 s += quad ? "x16" : "x8"; 641 break; 642 case 's': 643 s += poly ? "poly16" : "int16"; 644 if (scal) 645 break; 646 s += quad ? "x8" : "x4"; 647 break; 648 case 'i': 649 s += "int32"; 650 if (scal) 651 break; 652 s += quad ? "x4" : "x2"; 653 break; 654 case 'l': 655 s += (poly && !usgn)? "poly64" : "int64"; 656 if (scal) 657 break; 658 s += quad ? "x2" : "x1"; 659 break; 660 case 'h': 661 s += "float16"; 662 if (scal) 663 break; 664 s += quad ? "x8" : "x4"; 665 break; 666 case 'f': 667 s += "float32"; 668 if (scal) 669 break; 670 s += quad ? "x4" : "x2"; 671 break; 672 case 'd': 673 s += "float64"; 674 if (scal) 675 break; 676 s += quad ? "x2" : "x1"; 677 break; 678 679 default: 680 PrintFatalError("unhandled type!"); 681 } 682 683 if (mod == '2' || mod == 'B') 684 s += "x2"; 685 if (mod == '3' || mod == 'C') 686 s += "x3"; 687 if (mod == '4' || mod == 'D') 688 s += "x4"; 689 690 // Append _t, finishing the type string typedef type. 691 s += "_t"; 692 693 if (cnst) 694 s += " const"; 695 696 if (pntr) 697 s += " *"; 698 699 return s.str(); 700 } 701 702 /// BuiltinTypeString - for a modifier and type, generate the clang 703 /// BuiltinsARM.def prototype code for the function. See the top of clang's 704 /// Builtins.def for a description of the type strings. 705 static std::string BuiltinTypeString(const char mod, StringRef typestr, 706 ClassKind ck, bool ret) { 707 bool quad = false; 708 bool poly = false; 709 bool usgn = false; 710 bool scal = false; 711 bool cnst = false; 712 bool pntr = false; 713 714 if (mod == 'v') 715 return "v"; // void 716 if (mod == 'i') 717 return "i"; // int 718 719 // base type to get the type string for. 720 char type = ClassifyType(typestr, quad, poly, usgn); 721 722 // Based on the modifying character, change the type and width if necessary. 723 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 724 725 // All pointers are void* pointers. Change type to 'v' now. 726 if (pntr) { 727 usgn = false; 728 poly = false; 729 type = 'v'; 730 } 731 // Treat half-float ('h') types as unsigned short ('s') types. 732 if (type == 'h') { 733 type = 's'; 734 usgn = true; 735 } 736 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 737 scal && type != 'f' && type != 'd'); 738 739 if (scal) { 740 SmallString<128> s; 741 742 if (usgn) 743 s.push_back('U'); 744 else if (type == 'c') 745 s.push_back('S'); // make chars explicitly signed 746 747 if (type == 'l') // 64-bit long 748 s += "LLi"; 749 else 750 s.push_back(type); 751 752 if (cnst) 753 s.push_back('C'); 754 if (pntr) 755 s.push_back('*'); 756 return s.str(); 757 } 758 759 // Since the return value must be one type, return a vector type of the 760 // appropriate width which we will bitcast. An exception is made for 761 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 762 // fashion, storing them to a pointer arg. 763 if (ret) { 764 if (IsMultiVecProto(mod)) 765 return "vv*"; // void result with void* first argument 766 if (mod == 'f' || (ck != ClassB && type == 'f')) 767 return quad ? "V4f" : "V2f"; 768 if (ck != ClassB && type == 'd') 769 return quad ? "V2d" : "V1d"; 770 if (ck != ClassB && type == 's') 771 return quad ? "V8s" : "V4s"; 772 if (ck != ClassB && type == 'i') 773 return quad ? "V4i" : "V2i"; 774 if (ck != ClassB && type == 'l') 775 return quad ? "V2LLi" : "V1LLi"; 776 777 return quad ? "V16Sc" : "V8Sc"; 778 } 779 780 // Non-return array types are passed as individual vectors. 781 if (mod == '2' || mod == 'B') 782 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 783 if (mod == '3' || mod == 'C') 784 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 785 if (mod == '4' || mod == 'D') 786 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 787 788 if (mod == 'f' || (ck != ClassB && type == 'f')) 789 return quad ? "V4f" : "V2f"; 790 if (ck != ClassB && type == 'd') 791 return quad ? "V2d" : "V1d"; 792 if (ck != ClassB && type == 's') 793 return quad ? "V8s" : "V4s"; 794 if (ck != ClassB && type == 'i') 795 return quad ? "V4i" : "V2i"; 796 if (ck != ClassB && type == 'l') 797 return quad ? "V2LLi" : "V1LLi"; 798 799 return quad ? "V16Sc" : "V8Sc"; 800 } 801 802 /// InstructionTypeCode - Computes the ARM argument character code and 803 /// quad status for a specific type string and ClassKind. 804 static void InstructionTypeCode(const StringRef &typeStr, 805 const ClassKind ck, 806 bool &quad, 807 std::string &typeCode) { 808 bool poly = false; 809 bool usgn = false; 810 char type = ClassifyType(typeStr, quad, poly, usgn); 811 812 switch (type) { 813 case 'c': 814 switch (ck) { 815 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 816 case ClassI: typeCode = "i8"; break; 817 case ClassW: typeCode = "8"; break; 818 default: break; 819 } 820 break; 821 case 's': 822 switch (ck) { 823 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 824 case ClassI: typeCode = "i16"; break; 825 case ClassW: typeCode = "16"; break; 826 default: break; 827 } 828 break; 829 case 'i': 830 switch (ck) { 831 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 832 case ClassI: typeCode = "i32"; break; 833 case ClassW: typeCode = "32"; break; 834 default: break; 835 } 836 break; 837 case 'l': 838 switch (ck) { 839 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; 840 case ClassI: typeCode = "i64"; break; 841 case ClassW: typeCode = "64"; break; 842 default: break; 843 } 844 break; 845 case 'h': 846 switch (ck) { 847 case ClassS: 848 case ClassI: typeCode = "f16"; break; 849 case ClassW: typeCode = "16"; break; 850 default: break; 851 } 852 break; 853 case 'f': 854 switch (ck) { 855 case ClassS: 856 case ClassI: typeCode = "f32"; break; 857 case ClassW: typeCode = "32"; break; 858 default: break; 859 } 860 break; 861 case 'd': 862 switch (ck) { 863 case ClassS: 864 case ClassI: 865 typeCode += "f64"; 866 break; 867 case ClassW: 868 PrintFatalError("unhandled type!"); 869 default: 870 break; 871 } 872 break; 873 default: 874 PrintFatalError("unhandled type!"); 875 } 876 } 877 878 static char Insert_BHSD_Suffix(StringRef typestr){ 879 unsigned off = 0; 880 if(typestr[off++] == 'S'){ 881 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 882 typestr[off] == 'P' || typestr[off] == 'U') 883 ++off; 884 switch (typestr[off]){ 885 default : break; 886 case 'c' : return 'b'; 887 case 's' : return 'h'; 888 case 'i' : 889 case 'f' : return 's'; 890 case 'l' : 891 case 'd' : return 'd'; 892 } 893 } 894 return 0; 895 } 896 897 static bool endsWith_xN(std::string const &name) { 898 if (name.length() > 3) { 899 if (name.compare(name.length() - 3, 3, "_x2") == 0 || 900 name.compare(name.length() - 3, 3, "_x3") == 0 || 901 name.compare(name.length() - 3, 3, "_x4") == 0) 902 return true; 903 } 904 return false; 905 } 906 907 /// MangleName - Append a type or width suffix to a base neon function name, 908 /// and insert a 'q' in the appropriate location if type string starts with 'Q'. 909 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 910 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 911 static std::string MangleName(const std::string &name, StringRef typestr, 912 ClassKind ck) { 913 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64") 914 return name; 915 916 bool quad = false; 917 std::string typeCode = ""; 918 919 InstructionTypeCode(typestr, ck, quad, typeCode); 920 921 std::string s = name; 922 923 if (typeCode.size() > 0) { 924 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. 925 if (endsWith_xN(s)) 926 s.insert(s.length() - 3, "_" + typeCode); 927 else 928 s += "_" + typeCode; 929 } 930 931 if (ck == ClassB) 932 s += "_v"; 933 934 // Insert a 'q' before the first '_' character so that it ends up before 935 // _lane or _n on vector-scalar operations. 936 if (typestr.find("Q") != StringRef::npos) { 937 size_t pos = s.find('_'); 938 s = s.insert(pos, "q"); 939 } 940 char ins = Insert_BHSD_Suffix(typestr); 941 if(ins){ 942 size_t pos = s.find('_'); 943 s = s.insert(pos, &ins, 1); 944 } 945 946 return s; 947 } 948 949 static void PreprocessInstruction(const StringRef &Name, 950 const std::string &InstName, 951 std::string &Prefix, 952 bool &HasNPostfix, 953 bool &HasLanePostfix, 954 bool &HasDupPostfix, 955 bool &IsSpecialVCvt, 956 size_t &TBNumber) { 957 // All of our instruction name fields from arm_neon.td are of the form 958 // <instructionname>_... 959 // Thus we grab our instruction name via computation of said Prefix. 960 const size_t PrefixEnd = Name.find_first_of('_'); 961 // If InstName is passed in, we use that instead of our name Prefix. 962 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 963 964 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 965 966 HasNPostfix = Postfix.count("_n"); 967 HasLanePostfix = Postfix.count("_lane"); 968 HasDupPostfix = Postfix.count("_dup"); 969 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 970 971 if (InstName.compare("vtbl") == 0 || 972 InstName.compare("vtbx") == 0) { 973 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 974 // encoding to get its true value. 975 TBNumber = Name[Name.size()-1] - 48; 976 } 977 } 978 979 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 980 /// extracted, generate a FileCheck pattern for a Load Or Store 981 static void 982 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 983 const std::string& OutTypeCode, 984 const bool &IsQuad, 985 const bool &HasDupPostfix, 986 const bool &HasLanePostfix, 987 const size_t Count, 988 std::string &RegisterSuffix) { 989 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 990 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 991 // will output a series of v{ld,st}1s, so we have to handle it specially. 992 if ((Count == 3 || Count == 4) && IsQuad) { 993 RegisterSuffix += "{"; 994 for (size_t i = 0; i < Count; i++) { 995 RegisterSuffix += "d{{[0-9]+}}"; 996 if (HasDupPostfix) { 997 RegisterSuffix += "[]"; 998 } 999 if (HasLanePostfix) { 1000 RegisterSuffix += "[{{[0-9]+}}]"; 1001 } 1002 if (i < Count-1) { 1003 RegisterSuffix += ", "; 1004 } 1005 } 1006 RegisterSuffix += "}"; 1007 } else { 1008 1009 // Handle normal loads and stores. 1010 RegisterSuffix += "{"; 1011 for (size_t i = 0; i < Count; i++) { 1012 RegisterSuffix += "d{{[0-9]+}}"; 1013 if (HasDupPostfix) { 1014 RegisterSuffix += "[]"; 1015 } 1016 if (HasLanePostfix) { 1017 RegisterSuffix += "[{{[0-9]+}}]"; 1018 } 1019 if (IsQuad && !HasLanePostfix) { 1020 RegisterSuffix += ", d{{[0-9]+}}"; 1021 if (HasDupPostfix) { 1022 RegisterSuffix += "[]"; 1023 } 1024 } 1025 if (i < Count-1) { 1026 RegisterSuffix += ", "; 1027 } 1028 } 1029 RegisterSuffix += "}, [r{{[0-9]+}}"; 1030 1031 // We only include the alignment hint if we have a vld1.*64 or 1032 // a dup/lane instruction. 1033 if (IsLDSTOne) { 1034 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 1035 RegisterSuffix += ":" + OutTypeCode; 1036 } 1037 } 1038 1039 RegisterSuffix += "]"; 1040 } 1041 } 1042 1043 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 1044 const bool &HasNPostfix) { 1045 return (NameRef.count("vmla") || 1046 NameRef.count("vmlal") || 1047 NameRef.count("vmlsl") || 1048 NameRef.count("vmull") || 1049 NameRef.count("vqdmlal") || 1050 NameRef.count("vqdmlsl") || 1051 NameRef.count("vqdmulh") || 1052 NameRef.count("vqdmull") || 1053 NameRef.count("vqrdmulh")) && HasNPostfix; 1054 } 1055 1056 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1057 const bool &HasLanePostfix) { 1058 return (NameRef.count("vmla") || 1059 NameRef.count("vmls") || 1060 NameRef.count("vmlal") || 1061 NameRef.count("vmlsl") || 1062 (NameRef.count("vmul") && NameRef.size() == 3)|| 1063 NameRef.count("vqdmlal") || 1064 NameRef.count("vqdmlsl") || 1065 NameRef.count("vqdmulh") || 1066 NameRef.count("vqrdmulh")) && HasLanePostfix; 1067 } 1068 1069 static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1070 const bool &HasLanePostfix, 1071 const bool &IsQuad) { 1072 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1073 && IsQuad; 1074 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1075 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1076 } 1077 1078 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1079 const std::string &Proto, 1080 const bool &HasNPostfix, 1081 const bool &IsQuad, 1082 const bool &HasLanePostfix, 1083 const bool &HasDupPostfix, 1084 std::string &NormedProto) { 1085 // Handle generic case. 1086 const StringRef NameRef(Name); 1087 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1088 switch (Proto[i]) { 1089 case 'u': 1090 case 'f': 1091 case 'd': 1092 case 's': 1093 case 'x': 1094 case 't': 1095 case 'n': 1096 NormedProto += IsQuad? 'q' : 'd'; 1097 break; 1098 case 'w': 1099 case 'k': 1100 NormedProto += 'q'; 1101 break; 1102 case 'g': 1103 case 'j': 1104 case 'h': 1105 case 'e': 1106 NormedProto += 'd'; 1107 break; 1108 case 'i': 1109 NormedProto += HasLanePostfix? 'a' : 'i'; 1110 break; 1111 case 'a': 1112 if (HasLanePostfix) { 1113 NormedProto += 'a'; 1114 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1115 NormedProto += IsQuad? 'q' : 'd'; 1116 } else { 1117 NormedProto += 'i'; 1118 } 1119 break; 1120 } 1121 } 1122 1123 // Handle Special Cases. 1124 const bool IsNotVExt = !NameRef.count("vext"); 1125 const bool IsVPADAL = NameRef.count("vpadal"); 1126 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1127 HasLanePostfix); 1128 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1129 IsQuad); 1130 1131 if (IsSpecialLaneMul) { 1132 // If 1133 NormedProto[2] = NormedProto[3]; 1134 NormedProto.erase(3); 1135 } else if (NormedProto.size() == 4 && 1136 NormedProto[0] == NormedProto[1] && 1137 IsNotVExt) { 1138 // If NormedProto.size() == 4 and the first two proto characters are the 1139 // same, ignore the first. 1140 NormedProto = NormedProto.substr(1, 3); 1141 } else if (Is5OpLaneAccum) { 1142 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1143 std::string tmp = NormedProto.substr(1,2); 1144 tmp += NormedProto[4]; 1145 NormedProto = tmp; 1146 } else if (IsVPADAL) { 1147 // If we have VPADAL, ignore the first character. 1148 NormedProto = NormedProto.substr(0, 2); 1149 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1150 // If our instruction is a dup instruction, keep only the first and 1151 // last characters. 1152 std::string tmp = ""; 1153 tmp += NormedProto[0]; 1154 tmp += NormedProto[NormedProto.size()-1]; 1155 NormedProto = tmp; 1156 } 1157 } 1158 1159 /// GenerateRegisterCheckPatterns - Given a bunch of data we have 1160 /// extracted, generate a FileCheck pattern to check that an 1161 /// instruction's arguments are correct. 1162 static void GenerateRegisterCheckPattern(const std::string &Name, 1163 const std::string &Proto, 1164 const std::string &OutTypeCode, 1165 const bool &HasNPostfix, 1166 const bool &IsQuad, 1167 const bool &HasLanePostfix, 1168 const bool &HasDupPostfix, 1169 const size_t &TBNumber, 1170 std::string &RegisterSuffix) { 1171 1172 RegisterSuffix = ""; 1173 1174 const StringRef NameRef(Name); 1175 const StringRef ProtoRef(Proto); 1176 1177 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1178 return; 1179 } 1180 1181 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1182 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1183 1184 if (IsLoadStore) { 1185 // Grab N value from v{ld,st}N using its ascii representation. 1186 const size_t Count = NameRef[3] - 48; 1187 1188 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1189 HasDupPostfix, HasLanePostfix, 1190 Count, RegisterSuffix); 1191 } else if (IsTBXOrTBL) { 1192 RegisterSuffix += "d{{[0-9]+}}, {"; 1193 for (size_t i = 0; i < TBNumber-1; i++) { 1194 RegisterSuffix += "d{{[0-9]+}}, "; 1195 } 1196 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1197 } else { 1198 // Handle a normal instruction. 1199 if (NameRef.count("vget") || NameRef.count("vset")) 1200 return; 1201 1202 // We first normalize our proto, since we only need to emit 4 1203 // different types of checks, yet have more than 4 proto types 1204 // that map onto those 4 patterns. 1205 std::string NormalizedProto(""); 1206 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1207 HasLanePostfix, HasDupPostfix, 1208 NormalizedProto); 1209 1210 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1211 const char &c = NormalizedProto[i]; 1212 switch (c) { 1213 case 'q': 1214 RegisterSuffix += "q{{[0-9]+}}, "; 1215 break; 1216 1217 case 'd': 1218 RegisterSuffix += "d{{[0-9]+}}, "; 1219 break; 1220 1221 case 'i': 1222 RegisterSuffix += "#{{[0-9]+}}, "; 1223 break; 1224 1225 case 'a': 1226 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1227 break; 1228 } 1229 } 1230 1231 // Remove extra ", ". 1232 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1233 } 1234 } 1235 1236 /// GenerateChecksForIntrinsic - Given a specific instruction name + 1237 /// typestr + class kind, generate the proper set of FileCheck 1238 /// Patterns to check for. We could just return a string, but instead 1239 /// use a vector since it provides us with the extra flexibility of 1240 /// emitting multiple checks, which comes in handy for certain cases 1241 /// like mla where we want to check for 2 different instructions. 1242 static void GenerateChecksForIntrinsic(const std::string &Name, 1243 const std::string &Proto, 1244 StringRef &OutTypeStr, 1245 StringRef &InTypeStr, 1246 ClassKind Ck, 1247 const std::string &InstName, 1248 bool IsHiddenLOp, 1249 std::vector<std::string>& Result) { 1250 1251 // If Ck is a ClassNoTest instruction, just return so no test is 1252 // emitted. 1253 if(Ck == ClassNoTest) 1254 return; 1255 1256 if (Name == "vcvt_f32_f16") { 1257 Result.push_back("vcvt.f32.f16"); 1258 return; 1259 } 1260 1261 1262 // Now we preprocess our instruction given the data we have to get the 1263 // data that we need. 1264 // Create a StringRef for String Manipulation of our Name. 1265 const StringRef NameRef(Name); 1266 // Instruction Prefix. 1267 std::string Prefix; 1268 // The type code for our out type string. 1269 std::string OutTypeCode; 1270 // To handle our different cases, we need to check for different postfixes. 1271 // Is our instruction a quad instruction. 1272 bool IsQuad = false; 1273 // Our instruction is of the form <instructionname>_n. 1274 bool HasNPostfix = false; 1275 // Our instruction is of the form <instructionname>_lane. 1276 bool HasLanePostfix = false; 1277 // Our instruction is of the form <instructionname>_dup. 1278 bool HasDupPostfix = false; 1279 // Our instruction is a vcvt instruction which requires special handling. 1280 bool IsSpecialVCvt = false; 1281 // If we have a vtbxN or vtblN instruction, this is set to N. 1282 size_t TBNumber = -1; 1283 // Register Suffix 1284 std::string RegisterSuffix; 1285 1286 PreprocessInstruction(NameRef, InstName, Prefix, 1287 HasNPostfix, HasLanePostfix, HasDupPostfix, 1288 IsSpecialVCvt, TBNumber); 1289 1290 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1291 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1292 HasLanePostfix, HasDupPostfix, TBNumber, 1293 RegisterSuffix); 1294 1295 // In the following section, we handle a bunch of special cases. You can tell 1296 // a special case by the fact we are returning early. 1297 1298 // If our instruction is a logical instruction without postfix or a 1299 // hidden LOp just return the current Prefix. 1300 if (Ck == ClassL || IsHiddenLOp) { 1301 Result.push_back(Prefix + " " + RegisterSuffix); 1302 return; 1303 } 1304 1305 // If we have a vmov, due to the many different cases, some of which 1306 // vary within the different intrinsics generated for a single 1307 // instruction type, just output a vmov. (e.g. given an instruction 1308 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1309 // 1310 // FIXME: Maybe something can be done about this. The two cases that we care 1311 // about are vmov as an LType and vmov as a WType. 1312 if (Prefix == "vmov") { 1313 Result.push_back(Prefix + " " + RegisterSuffix); 1314 return; 1315 } 1316 1317 // In the following section, we handle special cases. 1318 1319 if (OutTypeCode == "64") { 1320 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1321 // type, the intrinsic will be optimized away, so just return 1322 // nothing. On the other hand if we are handling an uint64x2_t 1323 // (i.e. quad instruction), vdup/vmov instructions should be 1324 // emitted. 1325 if (Prefix == "vdup" || Prefix == "vext") { 1326 if (IsQuad) { 1327 Result.push_back("{{vmov|vdup}}"); 1328 } 1329 return; 1330 } 1331 1332 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1333 // multiple register operands. 1334 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1335 || Prefix == "vld4"; 1336 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1337 || Prefix == "vst4"; 1338 if (MultiLoadPrefix || MultiStorePrefix) { 1339 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1340 return; 1341 } 1342 1343 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1344 // emitting said instructions. So return a check for 1345 // vldr/vstr/vmov/str instead. 1346 if (HasLanePostfix || HasDupPostfix) { 1347 if (Prefix == "vst1") { 1348 Result.push_back("{{str|vstr|vmov}}"); 1349 return; 1350 } else if (Prefix == "vld1") { 1351 Result.push_back("{{ldr|vldr|vmov}}"); 1352 return; 1353 } 1354 } 1355 } 1356 1357 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1358 // sometimes disassembled as vtrn.32. We use a regex to handle both 1359 // cases. 1360 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1361 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1362 return; 1363 } 1364 1365 // Currently on most ARM processors, we do not use vmla/vmls for 1366 // quad floating point operations. Instead we output vmul + vadd. So 1367 // check if we have one of those instructions and just output a 1368 // check for vmul. 1369 if (OutTypeCode == "f32") { 1370 if (Prefix == "vmls") { 1371 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1372 Result.push_back("vsub." + OutTypeCode); 1373 return; 1374 } else if (Prefix == "vmla") { 1375 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1376 Result.push_back("vadd." + OutTypeCode); 1377 return; 1378 } 1379 } 1380 1381 // If we have vcvt, get the input type from the instruction name 1382 // (which should be of the form instname_inputtype) and append it 1383 // before the output type. 1384 if (Prefix == "vcvt") { 1385 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1386 Prefix += "." + inTypeCode; 1387 } 1388 1389 // Append output type code to get our final mangled instruction. 1390 Prefix += "." + OutTypeCode; 1391 1392 Result.push_back(Prefix + " " + RegisterSuffix); 1393 } 1394 1395 /// UseMacro - Examine the prototype string to determine if the intrinsic 1396 /// should be defined as a preprocessor macro instead of an inline function. 1397 static bool UseMacro(const std::string &proto) { 1398 // If this builtin takes an immediate argument, we need to #define it rather 1399 // than use a standard declaration, so that SemaChecking can range check 1400 // the immediate passed by the user. 1401 if (proto.find('i') != std::string::npos) 1402 return true; 1403 1404 // Pointer arguments need to use macros to avoid hiding aligned attributes 1405 // from the pointer type. 1406 if (proto.find('p') != std::string::npos || 1407 proto.find('c') != std::string::npos) 1408 return true; 1409 1410 return false; 1411 } 1412 1413 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1414 /// defined as a macro should be accessed directly instead of being first 1415 /// assigned to a local temporary. 1416 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1417 // True for constant ints (i), pointers (p) and const pointers (c). 1418 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1419 } 1420 1421 // Generate the string "(argtype a, argtype b, ...)" 1422 static std::string GenArgs(const std::string &proto, StringRef typestr, 1423 const std::string &name) { 1424 bool define = UseMacro(proto); 1425 char arg = 'a'; 1426 1427 std::string s; 1428 s += "("; 1429 1430 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1431 if (define) { 1432 // Some macro arguments are used directly instead of being assigned 1433 // to local temporaries; prepend an underscore prefix to make their 1434 // names consistent with the local temporaries. 1435 if (MacroArgUsedDirectly(proto, i)) 1436 s += "__"; 1437 } else { 1438 s += TypeString(proto[i], typestr) + " __"; 1439 } 1440 s.push_back(arg); 1441 //To avoid argument being multiple defined, add extra number for renaming. 1442 if (name == "vcopy_lane" || name == "vcopy_laneq") 1443 s.push_back('1'); 1444 if ((i + 1) < e) 1445 s += ", "; 1446 } 1447 1448 s += ")"; 1449 return s; 1450 } 1451 1452 // Macro arguments are not type-checked like inline function arguments, so 1453 // assign them to local temporaries to get the right type checking. 1454 static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1455 const std::string &name ) { 1456 char arg = 'a'; 1457 std::string s; 1458 bool generatedLocal = false; 1459 1460 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1461 // Do not create a temporary for an immediate argument. 1462 // That would defeat the whole point of using a macro! 1463 if (MacroArgUsedDirectly(proto, i)) 1464 continue; 1465 generatedLocal = true; 1466 bool extranumber = false; 1467 if (name == "vcopy_lane" || name == "vcopy_laneq") 1468 extranumber = true; 1469 1470 s += TypeString(proto[i], typestr) + " __"; 1471 s.push_back(arg); 1472 if(extranumber) 1473 s.push_back('1'); 1474 s += " = ("; 1475 s.push_back(arg); 1476 if(extranumber) 1477 s.push_back('1'); 1478 s += "); "; 1479 } 1480 1481 if (generatedLocal) 1482 s += "\\\n "; 1483 return s; 1484 } 1485 1486 // Use the vmovl builtin to sign-extend or zero-extend a vector. 1487 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1488 std::string s, high; 1489 high = h ? "_high" : ""; 1490 s = MangleName("vmovl" + high, typestr, ClassS); 1491 s += "(" + a + ")"; 1492 return s; 1493 } 1494 1495 // Get the high 64-bit part of a vector 1496 static std::string GetHigh(const std::string &a, StringRef typestr) { 1497 std::string s; 1498 s = MangleName("vget_high", typestr, ClassS); 1499 s += "(" + a + ")"; 1500 return s; 1501 } 1502 1503 // Gen operation with two operands and get high 64-bit for both of two operands. 1504 static std::string Gen2OpWith2High(StringRef typestr, 1505 const std::string &op, 1506 const std::string &a, 1507 const std::string &b) { 1508 std::string s; 1509 std::string Op1 = GetHigh(a, typestr); 1510 std::string Op2 = GetHigh(b, typestr); 1511 s = MangleName(op, typestr, ClassS); 1512 s += "(" + Op1 + ", " + Op2 + ");"; 1513 return s; 1514 } 1515 1516 // Gen operation with three operands and get high 64-bit of the latter 1517 // two operands. 1518 static std::string Gen3OpWith2High(StringRef typestr, 1519 const std::string &op, 1520 const std::string &a, 1521 const std::string &b, 1522 const std::string &c) { 1523 std::string s; 1524 std::string Op1 = GetHigh(b, typestr); 1525 std::string Op2 = GetHigh(c, typestr); 1526 s = MangleName(op, typestr, ClassS); 1527 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1528 return s; 1529 } 1530 1531 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1532 static std::string GenCombine(std::string typestr, 1533 const std::string &a, 1534 const std::string &b) { 1535 std::string s; 1536 s = MangleName("vcombine", typestr, ClassS); 1537 s += "(" + a + ", " + b + ")"; 1538 return s; 1539 } 1540 1541 static std::string Duplicate(unsigned nElts, StringRef typestr, 1542 const std::string &a) { 1543 std::string s; 1544 1545 s = "(" + TypeString('d', typestr) + "){ "; 1546 for (unsigned i = 0; i != nElts; ++i) { 1547 s += a; 1548 if ((i + 1) < nElts) 1549 s += ", "; 1550 } 1551 s += " }"; 1552 1553 return s; 1554 } 1555 1556 static std::string SplatLane(unsigned nElts, const std::string &vec, 1557 const std::string &lane) { 1558 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1559 for (unsigned i = 0; i < nElts; ++i) 1560 s += ", " + lane; 1561 s += ")"; 1562 return s; 1563 } 1564 1565 static std::string RemoveHigh(const std::string &name) { 1566 std::string s = name; 1567 std::size_t found = s.find("_high_"); 1568 if (found == std::string::npos) 1569 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1570 s.replace(found, 5, ""); 1571 return s; 1572 } 1573 1574 static unsigned GetNumElements(StringRef typestr, bool &quad) { 1575 quad = false; 1576 bool dummy = false; 1577 char type = ClassifyType(typestr, quad, dummy, dummy); 1578 unsigned nElts = 0; 1579 switch (type) { 1580 case 'c': nElts = 8; break; 1581 case 's': nElts = 4; break; 1582 case 'i': nElts = 2; break; 1583 case 'l': nElts = 1; break; 1584 case 'h': nElts = 4; break; 1585 case 'f': nElts = 2; break; 1586 case 'd': 1587 nElts = 1; 1588 break; 1589 default: 1590 PrintFatalError("unhandled type!"); 1591 } 1592 if (quad) nElts <<= 1; 1593 return nElts; 1594 } 1595 1596 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1597 static std::string GenOpString(const std::string &name, OpKind op, 1598 const std::string &proto, StringRef typestr) { 1599 bool quad; 1600 unsigned nElts = GetNumElements(typestr, quad); 1601 bool define = UseMacro(proto); 1602 1603 std::string ts = TypeString(proto[0], typestr); 1604 std::string s; 1605 if (!define) { 1606 s = "return "; 1607 } 1608 1609 switch(op) { 1610 case OpAdd: 1611 s += "__a + __b;"; 1612 break; 1613 case OpAddl: 1614 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1615 break; 1616 case OpAddlHi: 1617 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1618 break; 1619 case OpAddw: 1620 s += "__a + " + Extend(typestr, "__b") + ";"; 1621 break; 1622 case OpAddwHi: 1623 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1624 break; 1625 case OpSub: 1626 s += "__a - __b;"; 1627 break; 1628 case OpSubl: 1629 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1630 break; 1631 case OpSublHi: 1632 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1633 break; 1634 case OpSubw: 1635 s += "__a - " + Extend(typestr, "__b") + ";"; 1636 break; 1637 case OpSubwHi: 1638 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1639 break; 1640 case OpMulN: 1641 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1642 break; 1643 case OpMulLane: 1644 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1645 break; 1646 case OpMulXLane: 1647 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1648 SplatLane(nElts, "__b", "__c") + ");"; 1649 break; 1650 case OpMul: 1651 s += "__a * __b;"; 1652 break; 1653 case OpMullLane: 1654 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1655 SplatLane(nElts, "__b", "__c") + ");"; 1656 break; 1657 case OpMullHiLane: 1658 s += MangleName("vmull", typestr, ClassS) + "(" + 1659 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1660 break; 1661 case OpMlaN: 1662 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1663 break; 1664 case OpMlaLane: 1665 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1666 break; 1667 case OpMla: 1668 s += "__a + (__b * __c);"; 1669 break; 1670 case OpMlalN: 1671 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1672 Duplicate(nElts, typestr, "__c") + ");"; 1673 break; 1674 case OpMlalLane: 1675 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1676 SplatLane(nElts, "__c", "__d") + ");"; 1677 break; 1678 case OpMlalHiLane: 1679 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1680 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1681 break; 1682 case OpMlal: 1683 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1684 break; 1685 case OpMullHi: 1686 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1687 break; 1688 case OpMlalHi: 1689 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1690 break; 1691 case OpMlsN: 1692 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1693 break; 1694 case OpMlsLane: 1695 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1696 break; 1697 case OpFMSLane: 1698 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1699 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1700 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1701 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1702 break; 1703 case OpFMSLaneQ: 1704 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1705 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1706 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1707 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1708 break; 1709 case OpMls: 1710 s += "__a - (__b * __c);"; 1711 break; 1712 case OpMlslN: 1713 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1714 Duplicate(nElts, typestr, "__c") + ");"; 1715 break; 1716 case OpMlslLane: 1717 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1718 SplatLane(nElts, "__c", "__d") + ");"; 1719 break; 1720 case OpMlslHiLane: 1721 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1722 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1723 break; 1724 case OpMlsl: 1725 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1726 break; 1727 case OpMlslHi: 1728 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1729 break; 1730 case OpQDMullLane: 1731 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1732 SplatLane(nElts, "__b", "__c") + ");"; 1733 break; 1734 case OpQDMullHiLane: 1735 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1736 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1737 break; 1738 case OpQDMlalLane: 1739 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1740 SplatLane(nElts, "__c", "__d") + ");"; 1741 break; 1742 case OpQDMlalHiLane: 1743 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1744 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1745 break; 1746 case OpQDMlslLane: 1747 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1748 SplatLane(nElts, "__c", "__d") + ");"; 1749 break; 1750 case OpQDMlslHiLane: 1751 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1752 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1753 break; 1754 case OpQDMulhLane: 1755 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1756 SplatLane(nElts, "__b", "__c") + ");"; 1757 break; 1758 case OpQRDMulhLane: 1759 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1760 SplatLane(nElts, "__b", "__c") + ");"; 1761 break; 1762 case OpEq: 1763 s += "(" + ts + ")(__a == __b);"; 1764 break; 1765 case OpGe: 1766 s += "(" + ts + ")(__a >= __b);"; 1767 break; 1768 case OpLe: 1769 s += "(" + ts + ")(__a <= __b);"; 1770 break; 1771 case OpGt: 1772 s += "(" + ts + ")(__a > __b);"; 1773 break; 1774 case OpLt: 1775 s += "(" + ts + ")(__a < __b);"; 1776 break; 1777 case OpNeg: 1778 s += " -__a;"; 1779 break; 1780 case OpNot: 1781 s += " ~__a;"; 1782 break; 1783 case OpAnd: 1784 s += "__a & __b;"; 1785 break; 1786 case OpOr: 1787 s += "__a | __b;"; 1788 break; 1789 case OpXor: 1790 s += "__a ^ __b;"; 1791 break; 1792 case OpAndNot: 1793 s += "__a & ~__b;"; 1794 break; 1795 case OpOrNot: 1796 s += "__a | ~__b;"; 1797 break; 1798 case OpCast: 1799 s += "(" + ts + ")__a;"; 1800 break; 1801 case OpConcat: 1802 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1803 s += ", (int64x1_t)__b, 0, 1);"; 1804 break; 1805 case OpHi: 1806 // nElts is for the result vector, so the source is twice that number. 1807 s += "__builtin_shufflevector(__a, __a"; 1808 for (unsigned i = nElts; i < nElts * 2; ++i) 1809 s += ", " + utostr(i); 1810 s+= ");"; 1811 break; 1812 case OpLo: 1813 s += "__builtin_shufflevector(__a, __a"; 1814 for (unsigned i = 0; i < nElts; ++i) 1815 s += ", " + utostr(i); 1816 s+= ");"; 1817 break; 1818 case OpDup: 1819 s += Duplicate(nElts, typestr, "__a") + ";"; 1820 break; 1821 case OpDupLane: 1822 s += SplatLane(nElts, "__a", "__b") + ";"; 1823 break; 1824 case OpSelect: 1825 // ((0 & 1) | (~0 & 2)) 1826 s += "(" + ts + ")"; 1827 ts = TypeString(proto[1], typestr); 1828 s += "((__a & (" + ts + ")__b) | "; 1829 s += "(~__a & (" + ts + ")__c));"; 1830 break; 1831 case OpRev16: 1832 s += "__builtin_shufflevector(__a, __a"; 1833 for (unsigned i = 2; i <= nElts; i += 2) 1834 for (unsigned j = 0; j != 2; ++j) 1835 s += ", " + utostr(i - j - 1); 1836 s += ");"; 1837 break; 1838 case OpRev32: { 1839 unsigned WordElts = nElts >> (1 + (int)quad); 1840 s += "__builtin_shufflevector(__a, __a"; 1841 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1842 for (unsigned j = 0; j != WordElts; ++j) 1843 s += ", " + utostr(i - j - 1); 1844 s += ");"; 1845 break; 1846 } 1847 case OpRev64: { 1848 unsigned DblWordElts = nElts >> (int)quad; 1849 s += "__builtin_shufflevector(__a, __a"; 1850 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1851 for (unsigned j = 0; j != DblWordElts; ++j) 1852 s += ", " + utostr(i - j - 1); 1853 s += ");"; 1854 break; 1855 } 1856 case OpXtnHi: { 1857 s = TypeString(proto[1], typestr) + " __a1 = " + 1858 MangleName("vmovn", typestr, ClassS) + "(__b);\n " + 1859 "return __builtin_shufflevector(__a, __a1"; 1860 for (unsigned i = 0; i < nElts * 4; ++i) 1861 s += ", " + utostr(i); 1862 s += ");"; 1863 break; 1864 } 1865 case OpSqxtunHi: { 1866 s = TypeString(proto[1], typestr) + " __a1 = " + 1867 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + 1868 "return __builtin_shufflevector(__a, __a1"; 1869 for (unsigned i = 0; i < nElts * 4; ++i) 1870 s += ", " + utostr(i); 1871 s += ");"; 1872 break; 1873 } 1874 case OpQxtnHi: { 1875 s = TypeString(proto[1], typestr) + " __a1 = " + 1876 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + 1877 "return __builtin_shufflevector(__a, __a1"; 1878 for (unsigned i = 0; i < nElts * 4; ++i) 1879 s += ", " + utostr(i); 1880 s += ");"; 1881 break; 1882 } 1883 case OpFcvtnHi: { 1884 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; 1885 s = TypeString(proto[1], typestr) + " __a1 = " + 1886 MangleName(FName, typestr, ClassS) + "(__b);\n " + 1887 "return __builtin_shufflevector(__a, __a1"; 1888 for (unsigned i = 0; i < nElts * 4; ++i) 1889 s += ", " + utostr(i); 1890 s += ");"; 1891 break; 1892 } 1893 case OpFcvtlHi: { 1894 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; 1895 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + 1896 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; 1897 break; 1898 } 1899 case OpFcvtxnHi: { 1900 s = TypeString(proto[1], typestr) + " __a1 = " + 1901 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + 1902 "return __builtin_shufflevector(__a, __a1"; 1903 for (unsigned i = 0; i < nElts * 4; ++i) 1904 s += ", " + utostr(i); 1905 s += ");"; 1906 break; 1907 } 1908 case OpUzp1: 1909 s += "__builtin_shufflevector(__a, __b"; 1910 for (unsigned i = 0; i < nElts; i++) 1911 s += ", " + utostr(2*i); 1912 s += ");"; 1913 break; 1914 case OpUzp2: 1915 s += "__builtin_shufflevector(__a, __b"; 1916 for (unsigned i = 0; i < nElts; i++) 1917 s += ", " + utostr(2*i+1); 1918 s += ");"; 1919 break; 1920 case OpZip1: 1921 s += "__builtin_shufflevector(__a, __b"; 1922 for (unsigned i = 0; i < (nElts/2); i++) 1923 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1924 s += ");"; 1925 break; 1926 case OpZip2: 1927 s += "__builtin_shufflevector(__a, __b"; 1928 for (unsigned i = nElts/2; i < nElts; i++) 1929 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1930 s += ");"; 1931 break; 1932 case OpTrn1: 1933 s += "__builtin_shufflevector(__a, __b"; 1934 for (unsigned i = 0; i < (nElts/2); i++) 1935 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 1936 s += ");"; 1937 break; 1938 case OpTrn2: 1939 s += "__builtin_shufflevector(__a, __b"; 1940 for (unsigned i = 0; i < (nElts/2); i++) 1941 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 1942 s += ");"; 1943 break; 1944 case OpAbdl: { 1945 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1946 if (typestr[0] != 'U') { 1947 // vabd results are always unsigned and must be zero-extended. 1948 std::string utype = "U" + typestr.str(); 1949 s += "(" + TypeString(proto[0], typestr) + ")"; 1950 abd = "(" + TypeString('d', utype) + ")" + abd; 1951 s += Extend(utype, abd) + ";"; 1952 } else { 1953 s += Extend(typestr, abd) + ";"; 1954 } 1955 break; 1956 } 1957 case OpAbdlHi: 1958 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1959 break; 1960 case OpAddhnHi: { 1961 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1962 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1963 s += ";"; 1964 break; 1965 } 1966 case OpRAddhnHi: { 1967 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1968 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1969 s += ";"; 1970 break; 1971 } 1972 case OpSubhnHi: { 1973 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1974 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1975 s += ";"; 1976 break; 1977 } 1978 case OpRSubhnHi: { 1979 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1980 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1981 s += ";"; 1982 break; 1983 } 1984 case OpAba: 1985 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1986 break; 1987 case OpAbal: 1988 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1989 break; 1990 case OpAbalHi: 1991 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1992 break; 1993 case OpQDMullHi: 1994 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1995 break; 1996 case OpQDMlalHi: 1997 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1998 break; 1999 case OpQDMlslHi: 2000 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 2001 break; 2002 case OpDiv: 2003 s += "__a / __b;"; 2004 break; 2005 case OpMovlHi: { 2006 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2007 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 2008 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 2009 s += "(__a1, 0);"; 2010 break; 2011 } 2012 case OpLongHi: { 2013 // Another local variable __a1 is needed for calling a Macro, 2014 // or using __a will have naming conflict when Macro expanding. 2015 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2016 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 2017 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 2018 "(__a1, __b);"; 2019 break; 2020 } 2021 case OpNarrowHi: { 2022 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 2023 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 2024 break; 2025 } 2026 case OpCopyLane: { 2027 s += TypeString('s', typestr) + " __c2 = " + 2028 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 2029 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 2030 break; 2031 } 2032 case OpCopyQLane: { 2033 std::string typeCode = ""; 2034 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2035 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 2036 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; 2037 break; 2038 } 2039 case OpCopyLaneQ: { 2040 std::string typeCode = ""; 2041 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2042 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 2043 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; 2044 break; 2045 } 2046 case OpScalarMulLane: { 2047 std::string typeCode = ""; 2048 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2049 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2050 "(__b, __c);\\\n __a * __d1;"; 2051 break; 2052 } 2053 case OpScalarMulLaneQ: { 2054 std::string typeCode = ""; 2055 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2056 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + 2057 "(__b, __c);\\\n __a * __d1;"; 2058 break; 2059 } 2060 case OpScalarMulXLane: { 2061 bool dummy = false; 2062 char type = ClassifyType(typestr, dummy, dummy, dummy); 2063 if (type == 'f') type = 's'; 2064 std::string typeCode = ""; 2065 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2066 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2067 "(__b, __c);\\\n vmulx" + type + "_" + 2068 typeCode + "(__a, __d1);"; 2069 break; 2070 } 2071 case OpScalarMulXLaneQ: { 2072 bool dummy = false; 2073 char type = ClassifyType(typestr, dummy, dummy, dummy); 2074 if (type == 'f') type = 's'; 2075 std::string typeCode = ""; 2076 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2077 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + 2078 typeCode + "(__b, __c);\\\n vmulx" + type + 2079 "_" + typeCode + "(__a, __d1);"; 2080 break; 2081 } 2082 2083 case OpScalarVMulXLane: { 2084 bool dummy = false; 2085 char type = ClassifyType(typestr, dummy, dummy, dummy); 2086 if (type == 'f') type = 's'; 2087 std::string typeCode = ""; 2088 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2089 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2090 typeCode + "(__a, 0);\\\n" + 2091 " " + TypeString('s', typestr) + " __e1 = vget_lane_" + 2092 typeCode + "(__b, __c);\\\n" + 2093 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2094 typeCode + "(__d1, __e1);\\\n" + 2095 " " + TypeString('d', typestr) + " __g1;\\\n" + 2096 " vset_lane_" + typeCode + "(__f1, __g1, __c);"; 2097 break; 2098 } 2099 2100 case OpScalarVMulXLaneQ: { 2101 bool dummy = false; 2102 char type = ClassifyType(typestr, dummy, dummy, dummy); 2103 if (type == 'f') type = 's'; 2104 std::string typeCode = ""; 2105 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2106 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2107 typeCode + "(__a, 0);\\\n" + 2108 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + 2109 typeCode + "(__b, __c);\\\n" + 2110 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2111 typeCode + "(__d1, __e1);\\\n" + 2112 " " + TypeString('d', typestr) + " __g1;\\\n" + 2113 " vset_lane_" + typeCode + "(__f1, __g1, 0);"; 2114 break; 2115 } 2116 case OpScalarQDMullLane: { 2117 std::string typeCode = ""; 2118 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2119 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 2120 "vget_lane_" + typeCode + "(b, __c));"; 2121 break; 2122 } 2123 case OpScalarQDMullLaneQ: { 2124 std::string typeCode = ""; 2125 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2126 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 2127 "vgetq_lane_" + typeCode + "(b, __c));"; 2128 break; 2129 } 2130 case OpScalarQDMulHiLane: { 2131 std::string typeCode = ""; 2132 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2133 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 2134 "vget_lane_" + typeCode + "(__b, __c));"; 2135 break; 2136 } 2137 case OpScalarQDMulHiLaneQ: { 2138 std::string typeCode = ""; 2139 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2140 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 2141 "vgetq_lane_" + typeCode + "(__b, __c));"; 2142 break; 2143 } 2144 case OpScalarQRDMulHiLane: { 2145 std::string typeCode = ""; 2146 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2147 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 2148 "vget_lane_" + typeCode + "(__b, __c));"; 2149 break; 2150 } 2151 case OpScalarQRDMulHiLaneQ: { 2152 std::string typeCode = ""; 2153 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2154 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 2155 "vgetq_lane_" + typeCode + "(__b, __c));"; 2156 break; 2157 } 2158 default: 2159 PrintFatalError("unknown OpKind!"); 2160 } 2161 return s; 2162 } 2163 2164 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 2165 unsigned mod = proto[0]; 2166 2167 if (mod == 'v' || mod == 'f') 2168 mod = proto[1]; 2169 2170 bool quad = false; 2171 bool poly = false; 2172 bool usgn = false; 2173 bool scal = false; 2174 bool cnst = false; 2175 bool pntr = false; 2176 2177 // Base type to get the type string for. 2178 char type = ClassifyType(typestr, quad, poly, usgn); 2179 2180 // Based on the modifying character, change the type and width if necessary. 2181 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 2182 2183 NeonTypeFlags::EltType ET; 2184 switch (type) { 2185 case 'c': 2186 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 2187 break; 2188 case 's': 2189 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 2190 break; 2191 case 'i': 2192 ET = NeonTypeFlags::Int32; 2193 break; 2194 case 'l': 2195 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; 2196 break; 2197 case 'h': 2198 ET = NeonTypeFlags::Float16; 2199 break; 2200 case 'f': 2201 ET = NeonTypeFlags::Float32; 2202 break; 2203 case 'd': 2204 ET = NeonTypeFlags::Float64; 2205 break; 2206 default: 2207 PrintFatalError("unhandled type!"); 2208 } 2209 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 2210 return Flags.getFlags(); 2211 } 2212 2213 static bool ProtoHasScalar(const std::string proto) 2214 { 2215 return (proto.find('s') != std::string::npos 2216 || proto.find('r') != std::string::npos); 2217 } 2218 2219 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2220 static std::string GenBuiltin(const std::string &name, const std::string &proto, 2221 StringRef typestr, ClassKind ck) { 2222 std::string s; 2223 2224 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2225 // sret-like argument. 2226 bool sret = IsMultiVecProto(proto[0]); 2227 2228 bool define = UseMacro(proto); 2229 2230 // Check if the prototype has a scalar operand with the type of the vector 2231 // elements. If not, bitcasting the args will take care of arg checking. 2232 // The actual signedness etc. will be taken care of with special enums. 2233 if (!ProtoHasScalar(proto)) 2234 ck = ClassB; 2235 2236 if (proto[0] != 'v') { 2237 std::string ts = TypeString(proto[0], typestr); 2238 2239 if (define) { 2240 if (sret) 2241 s += ts + " r; "; 2242 else 2243 s += "(" + ts + ")"; 2244 } else if (sret) { 2245 s += ts + " r; "; 2246 } else { 2247 s += "return (" + ts + ")"; 2248 } 2249 } 2250 2251 bool splat = proto.find('a') != std::string::npos; 2252 2253 s += "__builtin_neon_"; 2254 if (splat) { 2255 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2256 std::string vname(name, 0, name.size()-2); 2257 s += MangleName(vname, typestr, ck); 2258 } else { 2259 s += MangleName(name, typestr, ck); 2260 } 2261 s += "("; 2262 2263 // Pass the address of the return variable as the first argument to sret-like 2264 // builtins. 2265 if (sret) 2266 s += "&r, "; 2267 2268 char arg = 'a'; 2269 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2270 std::string args = std::string(&arg, 1); 2271 2272 // Use the local temporaries instead of the macro arguments. 2273 args = "__" + args; 2274 2275 bool argQuad = false; 2276 bool argPoly = false; 2277 bool argUsgn = false; 2278 bool argScalar = false; 2279 bool dummy = false; 2280 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2281 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2282 dummy, dummy); 2283 2284 // Handle multiple-vector values specially, emitting each subvector as an 2285 // argument to the __builtin. 2286 unsigned NumOfVec = 0; 2287 if (proto[i] >= '2' && proto[i] <= '4') { 2288 NumOfVec = proto[i] - '0'; 2289 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2290 NumOfVec = proto[i] - 'A' + 1; 2291 } 2292 2293 if (NumOfVec > 0) { 2294 // Check if an explicit cast is needed. 2295 if (argType != 'c' || argPoly || argUsgn) 2296 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2297 2298 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2299 s += args + ".val[" + utostr(vi) + "]"; 2300 if ((vi + 1) < ve) 2301 s += ", "; 2302 } 2303 if ((i + 1) < e) 2304 s += ", "; 2305 2306 continue; 2307 } 2308 2309 if (splat && (i + 1) == e) 2310 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2311 2312 // Check if an explicit cast is needed. 2313 if ((splat || !argScalar) && 2314 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2315 std::string argTypeStr = "c"; 2316 if (ck != ClassB) 2317 argTypeStr = argType; 2318 if (argQuad) 2319 argTypeStr = "Q" + argTypeStr; 2320 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2321 } 2322 2323 s += args; 2324 if ((i + 1) < e) 2325 s += ", "; 2326 } 2327 2328 // Extra constant integer to hold type class enum for this function, e.g. s8 2329 if (ck == ClassB) 2330 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2331 2332 s += ");"; 2333 2334 if (proto[0] != 'v' && sret) { 2335 if (define) 2336 s += " r;"; 2337 else 2338 s += " return r;"; 2339 } 2340 return s; 2341 } 2342 2343 static std::string GenBuiltinDef(const std::string &name, 2344 const std::string &proto, 2345 StringRef typestr, ClassKind ck) { 2346 std::string s("BUILTIN(__builtin_neon_"); 2347 2348 // If all types are the same size, bitcasting the args will take care 2349 // of arg checking. The actual signedness etc. will be taken care of with 2350 // special enums. 2351 if (!ProtoHasScalar(proto)) 2352 ck = ClassB; 2353 2354 s += MangleName(name, typestr, ck); 2355 s += ", \""; 2356 2357 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2358 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2359 2360 // Extra constant integer to hold type class enum for this function, e.g. s8 2361 if (ck == ClassB) 2362 s += "i"; 2363 2364 s += "\", \"n\")"; 2365 return s; 2366 } 2367 2368 static std::string GenIntrinsic(const std::string &name, 2369 const std::string &proto, 2370 StringRef outTypeStr, StringRef inTypeStr, 2371 OpKind kind, ClassKind classKind) { 2372 assert(!proto.empty() && ""); 2373 bool define = UseMacro(proto) && kind != OpUnavailable; 2374 std::string s; 2375 2376 // static always inline + return type 2377 if (define) 2378 s += "#define "; 2379 else 2380 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2381 2382 // Function name with type suffix 2383 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2384 if (outTypeStr != inTypeStr) { 2385 // If the input type is different (e.g., for vreinterpret), append a suffix 2386 // for the input type. String off a "Q" (quad) prefix so that MangleName 2387 // does not insert another "q" in the name. 2388 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2389 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2390 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2391 } 2392 s += mangledName; 2393 2394 // Function arguments 2395 s += GenArgs(proto, inTypeStr, name); 2396 2397 // Definition. 2398 if (define) { 2399 s += " __extension__ ({ \\\n "; 2400 s += GenMacroLocals(proto, inTypeStr, name); 2401 } else if (kind == OpUnavailable) { 2402 s += " __attribute__((unavailable));\n"; 2403 return s; 2404 } else 2405 s += " {\n "; 2406 2407 if (kind != OpNone) 2408 s += GenOpString(name, kind, proto, outTypeStr); 2409 else 2410 s += GenBuiltin(name, proto, outTypeStr, classKind); 2411 if (define) 2412 s += " })"; 2413 else 2414 s += " }"; 2415 s += "\n"; 2416 return s; 2417 } 2418 2419 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2420 /// is comprised of type definitions and function declarations. 2421 void NeonEmitter::run(raw_ostream &OS) { 2422 OS << 2423 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2424 "---===\n" 2425 " *\n" 2426 " * Permission is hereby granted, free of charge, to any person obtaining " 2427 "a copy\n" 2428 " * of this software and associated documentation files (the \"Software\")," 2429 " to deal\n" 2430 " * in the Software without restriction, including without limitation the " 2431 "rights\n" 2432 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2433 "and/or sell\n" 2434 " * copies of the Software, and to permit persons to whom the Software is\n" 2435 " * furnished to do so, subject to the following conditions:\n" 2436 " *\n" 2437 " * The above copyright notice and this permission notice shall be " 2438 "included in\n" 2439 " * all copies or substantial portions of the Software.\n" 2440 " *\n" 2441 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2442 "EXPRESS OR\n" 2443 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2444 "MERCHANTABILITY,\n" 2445 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2446 "SHALL THE\n" 2447 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2448 "OTHER\n" 2449 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2450 "ARISING FROM,\n" 2451 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2452 "DEALINGS IN\n" 2453 " * THE SOFTWARE.\n" 2454 " *\n" 2455 " *===--------------------------------------------------------------------" 2456 "---===\n" 2457 " */\n\n"; 2458 2459 OS << "#ifndef __ARM_NEON_H\n"; 2460 OS << "#define __ARM_NEON_H\n\n"; 2461 2462 OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n"; 2463 OS << "#error \"NEON support not enabled\"\n"; 2464 OS << "#endif\n\n"; 2465 2466 OS << "#include <stdint.h>\n\n"; 2467 2468 // Emit NEON-specific scalar typedefs. 2469 OS << "typedef float float32_t;\n"; 2470 OS << "typedef __fp16 float16_t;\n"; 2471 2472 OS << "#ifdef __aarch64__\n"; 2473 OS << "typedef double float64_t;\n"; 2474 OS << "#endif\n\n"; 2475 2476 // For now, signedness of polynomial types depends on target 2477 OS << "#ifdef __aarch64__\n"; 2478 OS << "typedef uint8_t poly8_t;\n"; 2479 OS << "typedef uint16_t poly16_t;\n"; 2480 OS << "typedef uint64_t poly64_t;\n"; 2481 OS << "#else\n"; 2482 OS << "typedef int8_t poly8_t;\n"; 2483 OS << "typedef int16_t poly16_t;\n"; 2484 OS << "#endif\n"; 2485 2486 // Emit Neon vector typedefs. 2487 std::string TypedefTypes( 2488 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2489 SmallVector<StringRef, 24> TDTypeVec; 2490 ParseTypes(0, TypedefTypes, TDTypeVec); 2491 2492 // Emit vector typedefs. 2493 bool isA64 = false; 2494 bool preinsert; 2495 bool postinsert; 2496 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2497 bool dummy, quad = false, poly = false; 2498 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2499 preinsert = false; 2500 postinsert = false; 2501 2502 if (type == 'd' || (type == 'l' && poly)) { 2503 preinsert = isA64? false: true; 2504 isA64 = true; 2505 } else { 2506 postinsert = isA64? true: false; 2507 isA64 = false; 2508 } 2509 if (postinsert) 2510 OS << "#endif\n"; 2511 if (preinsert) 2512 OS << "#ifdef __aarch64__\n"; 2513 2514 if (poly) 2515 OS << "typedef __attribute__((neon_polyvector_type("; 2516 else 2517 OS << "typedef __attribute__((neon_vector_type("; 2518 2519 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2520 OS << utostr(nElts) << "))) "; 2521 if (nElts < 10) 2522 OS << " "; 2523 2524 OS << TypeString('s', TDTypeVec[i]); 2525 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2526 2527 } 2528 postinsert = isA64? true: false; 2529 if (postinsert) 2530 OS << "#endif\n"; 2531 OS << "\n"; 2532 2533 // Emit struct typedefs. 2534 isA64 = false; 2535 for (unsigned vi = 2; vi != 5; ++vi) { 2536 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2537 bool dummy, quad = false, poly = false; 2538 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2539 preinsert = false; 2540 postinsert = false; 2541 2542 if (type == 'd' || (type == 'l' && poly)) { 2543 preinsert = isA64? false: true; 2544 isA64 = true; 2545 } else { 2546 postinsert = isA64? true: false; 2547 isA64 = false; 2548 } 2549 if (postinsert) 2550 OS << "#endif\n"; 2551 if (preinsert) 2552 OS << "#ifdef __aarch64__\n"; 2553 2554 std::string ts = TypeString('d', TDTypeVec[i]); 2555 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2556 OS << "typedef struct " << vs << " {\n"; 2557 OS << " " << ts << " val"; 2558 OS << "[" << utostr(vi) << "]"; 2559 OS << ";\n} "; 2560 OS << vs << ";\n"; 2561 OS << "\n"; 2562 } 2563 } 2564 postinsert = isA64? true: false; 2565 if (postinsert) 2566 OS << "#endif\n"; 2567 OS << "\n"; 2568 2569 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2570 2571 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2572 2573 StringMap<ClassKind> EmittedMap; 2574 2575 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2576 // intrinsics. (Some of the saturating multiply instructions are also 2577 // used to implement the corresponding "_lane" variants, but tablegen 2578 // sorts the records into alphabetical order so that the "_lane" variants 2579 // come after the intrinsics they use.) 2580 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2581 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2582 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2583 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2584 2585 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2586 // common intrinsics appear only once in the output stream. 2587 // The check for uniquiness is done in emitIntrinsic. 2588 // Emit ARM intrinsics. 2589 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2590 Record *R = RV[i]; 2591 2592 // Skip AArch64 intrinsics; they will be emitted at the end. 2593 bool isA64 = R->getValueAsBit("isA64"); 2594 if (isA64) 2595 continue; 2596 2597 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2598 R->getName() != "VABD") 2599 emitIntrinsic(OS, R, EmittedMap); 2600 } 2601 2602 // Emit AArch64-specific intrinsics. 2603 OS << "#ifdef __aarch64__\n"; 2604 2605 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2606 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2607 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2608 2609 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2610 Record *R = RV[i]; 2611 2612 // Skip ARM intrinsics already included above. 2613 bool isA64 = R->getValueAsBit("isA64"); 2614 if (!isA64) 2615 continue; 2616 2617 // Skip crypto temporarily, and will emit them all together at the end. 2618 bool isCrypto = R->getValueAsBit("isCrypto"); 2619 if (isCrypto) 2620 continue; 2621 2622 emitIntrinsic(OS, R, EmittedMap); 2623 } 2624 2625 OS << "#ifdef __ARM_FEATURE_CRYPTO\n"; 2626 2627 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2628 Record *R = RV[i]; 2629 2630 // Skip crypto temporarily, and will emit them all together at the end. 2631 bool isCrypto = R->getValueAsBit("isCrypto"); 2632 if (!isCrypto) 2633 continue; 2634 2635 emitIntrinsic(OS, R, EmittedMap); 2636 } 2637 2638 OS << "#endif\n\n"; 2639 2640 OS << "#endif\n\n"; 2641 2642 OS << "#undef __ai\n\n"; 2643 OS << "#endif /* __ARM_NEON_H */\n"; 2644 } 2645 2646 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2647 /// intrinsics specified by record R checking for intrinsic uniqueness. 2648 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2649 StringMap<ClassKind> &EmittedMap) { 2650 std::string name = R->getValueAsString("Name"); 2651 std::string Proto = R->getValueAsString("Prototype"); 2652 std::string Types = R->getValueAsString("Types"); 2653 2654 SmallVector<StringRef, 16> TypeVec; 2655 ParseTypes(R, Types, TypeVec); 2656 2657 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2658 2659 ClassKind classKind = ClassNone; 2660 if (R->getSuperClasses().size() >= 2) 2661 classKind = ClassMap[R->getSuperClasses()[1]]; 2662 if (classKind == ClassNone && kind == OpNone) 2663 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2664 2665 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2666 if (kind == OpReinterpret) { 2667 bool outQuad = false; 2668 bool dummy = false; 2669 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2670 for (unsigned srcti = 0, srcte = TypeVec.size(); 2671 srcti != srcte; ++srcti) { 2672 bool inQuad = false; 2673 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2674 if (srcti == ti || inQuad != outQuad) 2675 continue; 2676 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2677 OpCast, ClassS); 2678 if (EmittedMap.count(s)) 2679 continue; 2680 EmittedMap[s] = ClassS; 2681 OS << s; 2682 } 2683 } else { 2684 std::string s = 2685 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2686 if (EmittedMap.count(s)) 2687 continue; 2688 EmittedMap[s] = classKind; 2689 OS << s; 2690 } 2691 } 2692 OS << "\n"; 2693 } 2694 2695 static unsigned RangeFromType(const char mod, StringRef typestr) { 2696 // base type to get the type string for. 2697 bool quad = false, dummy = false; 2698 char type = ClassifyType(typestr, quad, dummy, dummy); 2699 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2700 2701 switch (type) { 2702 case 'c': 2703 return (8 << (int)quad) - 1; 2704 case 'h': 2705 case 's': 2706 return (4 << (int)quad) - 1; 2707 case 'f': 2708 case 'i': 2709 return (2 << (int)quad) - 1; 2710 case 'd': 2711 case 'l': 2712 return (1 << (int)quad) - 1; 2713 default: 2714 PrintFatalError("unhandled type!"); 2715 } 2716 } 2717 2718 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2719 // base type to get the type string for. 2720 bool dummy = false; 2721 char type = ClassifyType(typestr, dummy, dummy, dummy); 2722 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2723 2724 switch (type) { 2725 case 'c': 2726 return 7; 2727 case 'h': 2728 case 's': 2729 return 15; 2730 case 'f': 2731 case 'i': 2732 return 31; 2733 case 'd': 2734 case 'l': 2735 return 63; 2736 default: 2737 PrintFatalError("unhandled type!"); 2738 } 2739 } 2740 2741 /// Generate the ARM and AArch64 intrinsic range checking code for 2742 /// shift/lane immediates, checking for unique declarations. 2743 void 2744 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2745 StringMap<ClassKind> &A64IntrinsicMap, 2746 bool isA64RangeCheck) { 2747 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2748 StringMap<OpKind> EmittedMap; 2749 2750 // Generate the intrinsic range checking code for shift/lane immediates. 2751 if (isA64RangeCheck) 2752 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2753 else 2754 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2755 2756 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2757 Record *R = RV[i]; 2758 2759 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2760 if (k != OpNone) 2761 continue; 2762 2763 std::string name = R->getValueAsString("Name"); 2764 std::string Proto = R->getValueAsString("Prototype"); 2765 std::string Types = R->getValueAsString("Types"); 2766 std::string Rename = name + "@" + Proto; 2767 2768 // Functions with 'a' (the splat code) in the type prototype should not get 2769 // their own builtin as they use the non-splat variant. 2770 if (Proto.find('a') != std::string::npos) 2771 continue; 2772 2773 // Functions which do not have an immediate do not need to have range 2774 // checking code emitted. 2775 size_t immPos = Proto.find('i'); 2776 if (immPos == std::string::npos) 2777 continue; 2778 2779 SmallVector<StringRef, 16> TypeVec; 2780 ParseTypes(R, Types, TypeVec); 2781 2782 if (R->getSuperClasses().size() < 2) 2783 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2784 2785 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2786 2787 // Do not include AArch64 range checks if not generating code for AArch64. 2788 bool isA64 = R->getValueAsBit("isA64"); 2789 if (!isA64RangeCheck && isA64) 2790 continue; 2791 2792 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2793 // redefined by AArch64 to handle new types. 2794 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2795 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2796 if (A64CK == ck && ck != ClassNone) 2797 continue; 2798 } 2799 2800 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2801 std::string namestr, shiftstr, rangestr; 2802 2803 if (R->getValueAsBit("isVCVT_N")) { 2804 // VCVT between floating- and fixed-point values takes an immediate 2805 // in the range [1, 32] for f32, or [1, 64] for f64. 2806 ck = ClassB; 2807 if (name.find("32") != std::string::npos) 2808 rangestr = "l = 1; u = 31"; // upper bound = l + u 2809 else if (name.find("64") != std::string::npos) 2810 rangestr = "l = 1; u = 63"; 2811 else 2812 PrintFatalError(R->getLoc(), 2813 "Fixed point convert name should contains \"32\" or \"64\""); 2814 2815 } else if (R->getValueAsBit("isScalarShift")) { 2816 // Right shifts have an 'r' in the name, left shifts do not. Convert 2817 // instructions have the same bounds and right shifts. 2818 if (name.find('r') != std::string::npos || 2819 name.find("cvt") != std::string::npos) 2820 rangestr = "l = 1; "; 2821 2822 rangestr += "u = " + 2823 utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti])); 2824 } else if (!ProtoHasScalar(Proto)) { 2825 // Builtins which are overloaded by type will need to have their upper 2826 // bound computed at Sema time based on the type constant. 2827 ck = ClassB; 2828 if (R->getValueAsBit("isShift")) { 2829 shiftstr = ", true"; 2830 2831 // Right shifts have an 'r' in the name, left shifts do not. 2832 if (name.find('r') != std::string::npos) 2833 rangestr = "l = 1; "; 2834 } 2835 rangestr += "u = RFT(TV" + shiftstr + ")"; 2836 } else { 2837 // The immediate generally refers to a lane in the preceding argument. 2838 assert(immPos > 0 && "unexpected immediate operand"); 2839 rangestr = 2840 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2841 } 2842 // Make sure cases appear only once by uniquing them in a string map. 2843 namestr = MangleName(name, TypeVec[ti], ck); 2844 if (EmittedMap.count(namestr)) 2845 continue; 2846 EmittedMap[namestr] = OpNone; 2847 2848 // Calculate the index of the immediate that should be range checked. 2849 unsigned immidx = 0; 2850 2851 // Builtins that return a struct of multiple vectors have an extra 2852 // leading arg for the struct return. 2853 if (IsMultiVecProto(Proto[0])) 2854 ++immidx; 2855 2856 // Add one to the index for each argument until we reach the immediate 2857 // to be checked. Structs of vectors are passed as multiple arguments. 2858 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2859 switch (Proto[ii]) { 2860 default: 2861 immidx += 1; 2862 break; 2863 case '2': 2864 case 'B': 2865 immidx += 2; 2866 break; 2867 case '3': 2868 case 'C': 2869 immidx += 3; 2870 break; 2871 case '4': 2872 case 'D': 2873 immidx += 4; 2874 break; 2875 case 'i': 2876 ie = ii + 1; 2877 break; 2878 } 2879 } 2880 if (isA64RangeCheck) 2881 OS << "case AArch64::BI__builtin_neon_"; 2882 else 2883 OS << "case ARM::BI__builtin_neon_"; 2884 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2885 << rangestr << "; break;\n"; 2886 } 2887 } 2888 OS << "#endif\n\n"; 2889 } 2890 2891 /// Generate the ARM and AArch64 overloaded type checking code for 2892 /// SemaChecking.cpp, checking for unique builtin declarations. 2893 void 2894 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2895 StringMap<ClassKind> &A64IntrinsicMap, 2896 bool isA64TypeCheck) { 2897 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2898 StringMap<OpKind> EmittedMap; 2899 2900 // Generate the overloaded type checking code for SemaChecking.cpp 2901 if (isA64TypeCheck) 2902 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2903 else 2904 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2905 2906 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2907 Record *R = RV[i]; 2908 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2909 if (k != OpNone) 2910 continue; 2911 2912 std::string Proto = R->getValueAsString("Prototype"); 2913 std::string Types = R->getValueAsString("Types"); 2914 std::string name = R->getValueAsString("Name"); 2915 std::string Rename = name + "@" + Proto; 2916 2917 // Functions with 'a' (the splat code) in the type prototype should not get 2918 // their own builtin as they use the non-splat variant. 2919 if (Proto.find('a') != std::string::npos) 2920 continue; 2921 2922 // Functions which have a scalar argument cannot be overloaded, no need to 2923 // check them if we are emitting the type checking code. 2924 if (ProtoHasScalar(Proto)) 2925 continue; 2926 2927 SmallVector<StringRef, 16> TypeVec; 2928 ParseTypes(R, Types, TypeVec); 2929 2930 if (R->getSuperClasses().size() < 2) 2931 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2932 2933 // Do not include AArch64 type checks if not generating code for AArch64. 2934 bool isA64 = R->getValueAsBit("isA64"); 2935 if (!isA64TypeCheck && isA64) 2936 continue; 2937 2938 // Include ARM type check in AArch64 but only if ARM intrinsics 2939 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2940 // redefined in AArch64 to handle an additional 2 x f64 type. 2941 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2942 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2943 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2944 if (A64CK == ck && ck != ClassNone) 2945 continue; 2946 } 2947 2948 int si = -1, qi = -1; 2949 uint64_t mask = 0, qmask = 0; 2950 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2951 // Generate the switch case(s) for this builtin for the type validation. 2952 bool quad = false, poly = false, usgn = false; 2953 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2954 2955 if (quad) { 2956 qi = ti; 2957 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2958 } else { 2959 si = ti; 2960 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2961 } 2962 } 2963 2964 // Check if the builtin function has a pointer or const pointer argument. 2965 int PtrArgNum = -1; 2966 bool HasConstPtr = false; 2967 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2968 char ArgType = Proto[arg]; 2969 if (ArgType == 'c') { 2970 HasConstPtr = true; 2971 PtrArgNum = arg - 1; 2972 break; 2973 } 2974 if (ArgType == 'p') { 2975 PtrArgNum = arg - 1; 2976 break; 2977 } 2978 } 2979 // For sret builtins, adjust the pointer argument index. 2980 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 2981 PtrArgNum += 1; 2982 2983 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2984 // and vst1_lane intrinsics. Using a pointer to the vector element 2985 // type with one of those operations causes codegen to select an aligned 2986 // load/store instruction. If you want an unaligned operation, 2987 // the pointer argument needs to have less alignment than element type, 2988 // so just accept any pointer type. 2989 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2990 PtrArgNum = -1; 2991 HasConstPtr = false; 2992 } 2993 2994 if (mask) { 2995 if (isA64TypeCheck) 2996 OS << "case AArch64::BI__builtin_neon_"; 2997 else 2998 OS << "case ARM::BI__builtin_neon_"; 2999 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 3000 << "0x" << utohexstr(mask) << "ULL"; 3001 if (PtrArgNum >= 0) 3002 OS << "; PtrArgNum = " << PtrArgNum; 3003 if (HasConstPtr) 3004 OS << "; HasConstPtr = true"; 3005 OS << "; break;\n"; 3006 } 3007 if (qmask) { 3008 if (isA64TypeCheck) 3009 OS << "case AArch64::BI__builtin_neon_"; 3010 else 3011 OS << "case ARM::BI__builtin_neon_"; 3012 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 3013 << "0x" << utohexstr(qmask) << "ULL"; 3014 if (PtrArgNum >= 0) 3015 OS << "; PtrArgNum = " << PtrArgNum; 3016 if (HasConstPtr) 3017 OS << "; HasConstPtr = true"; 3018 OS << "; break;\n"; 3019 } 3020 } 3021 OS << "#endif\n\n"; 3022 } 3023 3024 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 3025 /// declaration of builtins, checking for unique builtin declarations. 3026 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 3027 StringMap<ClassKind> &A64IntrinsicMap, 3028 bool isA64GenBuiltinDef) { 3029 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3030 StringMap<OpKind> EmittedMap; 3031 3032 // Generate BuiltinsARM.def and BuiltinsAArch64.def 3033 if (isA64GenBuiltinDef) 3034 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 3035 else 3036 OS << "#ifdef GET_NEON_BUILTINS\n"; 3037 3038 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3039 Record *R = RV[i]; 3040 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 3041 if (k != OpNone) 3042 continue; 3043 3044 std::string Proto = R->getValueAsString("Prototype"); 3045 std::string name = R->getValueAsString("Name"); 3046 std::string Rename = name + "@" + Proto; 3047 3048 // Functions with 'a' (the splat code) in the type prototype should not get 3049 // their own builtin as they use the non-splat variant. 3050 if (Proto.find('a') != std::string::npos) 3051 continue; 3052 3053 std::string Types = R->getValueAsString("Types"); 3054 SmallVector<StringRef, 16> TypeVec; 3055 ParseTypes(R, Types, TypeVec); 3056 3057 if (R->getSuperClasses().size() < 2) 3058 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 3059 3060 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3061 3062 // Do not include AArch64 BUILTIN() macros if not generating 3063 // code for AArch64 3064 bool isA64 = R->getValueAsBit("isA64"); 3065 if (!isA64GenBuiltinDef && isA64) 3066 continue; 3067 3068 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 3069 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 3070 // redefined in AArch64 to handle an additional 2 x f64 type. 3071 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 3072 ClassKind &A64CK = A64IntrinsicMap[Rename]; 3073 if (A64CK == ck && ck != ClassNone) 3074 continue; 3075 } 3076 3077 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3078 // Generate the declaration for this builtin, ensuring 3079 // that each unique BUILTIN() macro appears only once in the output 3080 // stream. 3081 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 3082 if (EmittedMap.count(bd)) 3083 continue; 3084 3085 EmittedMap[bd] = OpNone; 3086 OS << bd << "\n"; 3087 } 3088 } 3089 OS << "#endif\n\n"; 3090 } 3091 3092 /// runHeader - Emit a file with sections defining: 3093 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 3094 /// 2. the SemaChecking code for the type overload checking. 3095 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 3096 void NeonEmitter::runHeader(raw_ostream &OS) { 3097 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3098 3099 // build a map of AArch64 intriniscs to be used in uniqueness checks. 3100 StringMap<ClassKind> A64IntrinsicMap; 3101 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3102 Record *R = RV[i]; 3103 3104 bool isA64 = R->getValueAsBit("isA64"); 3105 if (!isA64) 3106 continue; 3107 3108 ClassKind CK = ClassNone; 3109 if (R->getSuperClasses().size() >= 2) 3110 CK = ClassMap[R->getSuperClasses()[1]]; 3111 3112 std::string Name = R->getValueAsString("Name"); 3113 std::string Proto = R->getValueAsString("Prototype"); 3114 std::string Rename = Name + "@" + Proto; 3115 if (A64IntrinsicMap.count(Rename)) 3116 continue; 3117 A64IntrinsicMap[Rename] = CK; 3118 } 3119 3120 // Generate BuiltinsARM.def for ARM 3121 genBuiltinsDef(OS, A64IntrinsicMap, false); 3122 3123 // Generate BuiltinsAArch64.def for AArch64 3124 genBuiltinsDef(OS, A64IntrinsicMap, true); 3125 3126 // Generate ARM overloaded type checking code for SemaChecking.cpp 3127 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 3128 3129 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 3130 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 3131 3132 // Generate ARM range checking code for shift/lane immediates. 3133 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 3134 3135 // Generate the AArch64 range checking code for shift/lane immediates. 3136 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 3137 } 3138 3139 /// GenTest - Write out a test for the intrinsic specified by the name and 3140 /// type strings, including the embedded patterns for FileCheck to match. 3141 static std::string GenTest(const std::string &name, 3142 const std::string &proto, 3143 StringRef outTypeStr, StringRef inTypeStr, 3144 bool isShift, bool isHiddenLOp, 3145 ClassKind ck, const std::string &InstName, 3146 bool isA64, 3147 std::string & testFuncProto) { 3148 assert(!proto.empty() && ""); 3149 std::string s; 3150 3151 // Function name with type suffix 3152 std::string mangledName = MangleName(name, outTypeStr, ClassS); 3153 if (outTypeStr != inTypeStr) { 3154 // If the input type is different (e.g., for vreinterpret), append a suffix 3155 // for the input type. String off a "Q" (quad) prefix so that MangleName 3156 // does not insert another "q" in the name. 3157 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 3158 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 3159 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 3160 } 3161 3162 // todo: GenerateChecksForIntrinsic does not generate CHECK 3163 // for aarch64 instructions yet 3164 std::vector<std::string> FileCheckPatterns; 3165 if (!isA64) { 3166 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 3167 isHiddenLOp, FileCheckPatterns); 3168 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 3169 } 3170 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 3171 3172 // Emit the FileCheck patterns. 3173 // If for any reason we do not want to emit a check, mangledInst 3174 // will be the empty string. 3175 if (FileCheckPatterns.size()) { 3176 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 3177 e = FileCheckPatterns.end(); 3178 i != e; 3179 ++i) { 3180 s += "// CHECK_ARM: " + *i + "\n"; 3181 } 3182 } 3183 3184 // Emit the start of the test function. 3185 3186 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 3187 char arg = 'a'; 3188 std::string comma; 3189 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3190 // Do not create arguments for values that must be immediate constants. 3191 if (proto[i] == 'i') 3192 continue; 3193 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 3194 testFuncProto.push_back(arg); 3195 comma = ", "; 3196 } 3197 testFuncProto += ")"; 3198 3199 s+= testFuncProto; 3200 s+= " {\n "; 3201 3202 if (proto[0] != 'v') 3203 s += "return "; 3204 s += mangledName + "("; 3205 arg = 'a'; 3206 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3207 if (proto[i] == 'i') { 3208 // For immediate operands, test the maximum value. 3209 if (isShift) 3210 s += "1"; // FIXME 3211 else 3212 // The immediate generally refers to a lane in the preceding argument. 3213 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 3214 } else { 3215 s.push_back(arg); 3216 } 3217 if ((i + 1) < e) 3218 s += ", "; 3219 } 3220 s += ");\n}\n\n"; 3221 return s; 3222 } 3223 3224 /// Write out all intrinsic tests for the specified target, checking 3225 /// for intrinsic test uniqueness. 3226 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 3227 bool isA64GenTest) { 3228 if (isA64GenTest) 3229 OS << "#ifdef __aarch64__\n"; 3230 3231 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3232 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3233 Record *R = RV[i]; 3234 std::string name = R->getValueAsString("Name"); 3235 std::string Proto = R->getValueAsString("Prototype"); 3236 std::string Types = R->getValueAsString("Types"); 3237 bool isShift = R->getValueAsBit("isShift"); 3238 std::string InstName = R->getValueAsString("InstName"); 3239 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 3240 bool isA64 = R->getValueAsBit("isA64"); 3241 3242 // do not include AArch64 intrinsic test if not generating 3243 // code for AArch64 3244 if (!isA64GenTest && isA64) 3245 continue; 3246 3247 SmallVector<StringRef, 16> TypeVec; 3248 ParseTypes(R, Types, TypeVec); 3249 3250 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3251 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3252 if (kind == OpUnavailable) 3253 continue; 3254 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3255 if (kind == OpReinterpret) { 3256 bool outQuad = false; 3257 bool dummy = false; 3258 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3259 for (unsigned srcti = 0, srcte = TypeVec.size(); 3260 srcti != srcte; ++srcti) { 3261 bool inQuad = false; 3262 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3263 if (srcti == ti || inQuad != outQuad) 3264 continue; 3265 std::string testFuncProto; 3266 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3267 isShift, isHiddenLOp, ck, InstName, isA64, 3268 testFuncProto); 3269 if (EmittedMap.count(testFuncProto)) 3270 continue; 3271 EmittedMap[testFuncProto] = kind; 3272 OS << s << "\n"; 3273 } 3274 } else { 3275 std::string testFuncProto; 3276 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 3277 isHiddenLOp, ck, InstName, isA64, testFuncProto); 3278 if (EmittedMap.count(testFuncProto)) 3279 continue; 3280 EmittedMap[testFuncProto] = kind; 3281 OS << s << "\n"; 3282 } 3283 } 3284 } 3285 3286 if (isA64GenTest) 3287 OS << "#endif\n"; 3288 } 3289 /// runTests - Write out a complete set of tests for all of the Neon 3290 /// intrinsics. 3291 void NeonEmitter::runTests(raw_ostream &OS) { 3292 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3293 "apcs-gnu\\\n" 3294 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3295 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3296 "\n" 3297 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3298 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3299 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3300 "\n" 3301 "// REQUIRES: long_tests\n" 3302 "\n" 3303 "#include <arm_neon.h>\n" 3304 "\n"; 3305 3306 // ARM tests must be emitted before AArch64 tests to ensure 3307 // tests for intrinsics that are common to ARM and AArch64 3308 // appear only once in the output stream. 3309 // The check for uniqueness is done in genTargetTest. 3310 StringMap<OpKind> EmittedMap; 3311 3312 genTargetTest(OS, EmittedMap, false); 3313 3314 genTargetTest(OS, EmittedMap, true); 3315 } 3316 3317 namespace clang { 3318 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3319 NeonEmitter(Records).run(OS); 3320 } 3321 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3322 NeonEmitter(Records).runHeader(OS); 3323 } 3324 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3325 NeonEmitter(Records).runTests(OS); 3326 } 3327 } // End namespace clang 3328