1 //===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/TableGen/AArch64ImmCheck.h" 37 #include "llvm/TableGen/Error.h" 38 #include "llvm/TableGen/Record.h" 39 #include "llvm/TableGen/SetTheory.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cctype> 43 #include <cstddef> 44 #include <cstdint> 45 #include <deque> 46 #include <map> 47 #include <optional> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <unordered_map> 52 #include <utility> 53 #include <vector> 54 55 using namespace llvm; 56 57 namespace { 58 59 // While globals are generally bad, this one allows us to perform assertions 60 // liberally and somehow still trace them back to the def they indirectly 61 // came from. 62 static const Record *CurrentRecord = nullptr; 63 static void assert_with_loc(bool Assertion, const std::string &Str) { 64 if (!Assertion) { 65 if (CurrentRecord) 66 PrintFatalError(CurrentRecord->getLoc(), Str); 67 else 68 PrintFatalError(Str); 69 } 70 } 71 72 enum ClassKind { 73 ClassNone, 74 ClassI, // generic integer instruction, e.g., "i8" suffix 75 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 76 ClassW, // width-specific instruction, e.g., "8" suffix 77 ClassV, // void-suffix instruction, no suffix 78 ClassB, // bitcast arguments with enum argument to specify type 79 ClassL, // Logical instructions which are op instructions 80 // but we need to not emit any suffix for in our 81 // tests. 82 ClassNoTest // Instructions which we do not test since they are 83 // not TRUE instructions. 84 }; 85 86 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 87 /// builtins. These must be kept in sync with the flags in 88 /// include/clang/Basic/TargetBuiltins.h. 89 namespace NeonTypeFlags { 90 91 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 92 93 enum EltType { 94 Int8, 95 Int16, 96 Int32, 97 Int64, 98 Poly8, 99 Poly16, 100 Poly64, 101 Poly128, 102 Float16, 103 Float32, 104 Float64, 105 BFloat16, 106 MFloat8 107 }; 108 109 } // end namespace NeonTypeFlags 110 111 class NeonEmitter; 112 113 //===----------------------------------------------------------------------===// 114 // TypeSpec 115 //===----------------------------------------------------------------------===// 116 117 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 118 /// for strong typing purposes. 119 /// 120 /// A TypeSpec can be used to create a type. 121 class TypeSpec : public std::string { 122 public: 123 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 124 std::vector<TypeSpec> Ret; 125 TypeSpec Acc; 126 for (char I : Str.str()) { 127 if (islower(I)) { 128 Acc.push_back(I); 129 Ret.push_back(TypeSpec(Acc)); 130 Acc.clear(); 131 } else { 132 Acc.push_back(I); 133 } 134 } 135 return Ret; 136 } 137 }; 138 139 //===----------------------------------------------------------------------===// 140 // Type 141 //===----------------------------------------------------------------------===// 142 143 /// A Type. Not much more to say here. 144 class Type { 145 private: 146 TypeSpec TS; 147 148 enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM }; 149 TypeKind Kind; 150 bool Immediate, Constant, Pointer; 151 // ScalarForMangling and NoManglingQ are really not suited to live here as 152 // they are not related to the type. But they live in the TypeSpec (not the 153 // prototype), so this is really the only place to store them. 154 bool ScalarForMangling, NoManglingQ; 155 unsigned Bitwidth, ElementBitwidth, NumVectors; 156 157 public: 158 Type() 159 : Kind(Void), Immediate(false), Constant(false), 160 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 161 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 162 163 Type(TypeSpec TS, StringRef CharMods) 164 : TS(std::move(TS)), Kind(Void), Immediate(false), 165 Constant(false), Pointer(false), ScalarForMangling(false), 166 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 167 applyModifiers(CharMods); 168 } 169 170 /// Returns a type representing "void". 171 static Type getVoid() { return Type(); } 172 173 bool operator==(const Type &Other) const { return str() == Other.str(); } 174 bool operator!=(const Type &Other) const { return !operator==(Other); } 175 176 // 177 // Query functions 178 // 179 bool isScalarForMangling() const { return ScalarForMangling; } 180 bool noManglingQ() const { return NoManglingQ; } 181 182 bool isPointer() const { return Pointer; } 183 bool isValue() const { return !isVoid() && !isPointer(); } 184 bool isScalar() const { return isValue() && NumVectors == 0; } 185 bool isVector() const { return isValue() && NumVectors > 0; } 186 bool isConstPointer() const { return Constant; } 187 bool isFloating() const { return Kind == Float; } 188 bool isInteger() const { return Kind == SInt || Kind == UInt; } 189 bool isPoly() const { return Kind == Poly; } 190 bool isSigned() const { return Kind == SInt; } 191 bool isImmediate() const { return Immediate; } 192 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 193 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 194 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 195 bool isChar() const { return ElementBitwidth == 8; } 196 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 197 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 198 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 199 bool isVoid() const { return Kind == Void; } 200 bool isBFloat16() const { return Kind == BFloat16; } 201 bool isMFloat8() const { return Kind == MFloat8; } 202 bool isFPM() const { return Kind == FPM; } 203 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 204 unsigned getSizeInBits() const { return Bitwidth; } 205 unsigned getElementSizeInBits() const { return ElementBitwidth; } 206 unsigned getNumVectors() const { return NumVectors; } 207 208 // 209 // Mutator functions 210 // 211 void makeUnsigned() { 212 assert(!isVoid() && "not a potentially signed type"); 213 Kind = UInt; 214 } 215 void makeSigned() { 216 assert(!isVoid() && "not a potentially signed type"); 217 Kind = SInt; 218 } 219 220 void makeInteger(unsigned ElemWidth, bool Sign) { 221 assert(!isVoid() && "converting void to int probably not useful"); 222 Kind = Sign ? SInt : UInt; 223 Immediate = false; 224 ElementBitwidth = ElemWidth; 225 } 226 227 void makeImmediate(unsigned ElemWidth) { 228 Kind = SInt; 229 Immediate = true; 230 ElementBitwidth = ElemWidth; 231 } 232 233 void makeScalar() { 234 Bitwidth = ElementBitwidth; 235 NumVectors = 0; 236 } 237 238 void makeOneVector() { 239 assert(isVector()); 240 NumVectors = 1; 241 } 242 243 void make32BitElement() { 244 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 245 ElementBitwidth = 32; 246 } 247 248 void doubleLanes() { 249 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 250 Bitwidth = 128; 251 } 252 253 void halveLanes() { 254 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 255 Bitwidth = 64; 256 } 257 258 /// Return the C string representation of a type, which is the typename 259 /// defined in stdint.h or arm_neon.h. 260 std::string str() const; 261 262 /// Return the string representation of a type, which is an encoded 263 /// string for passing to the BUILTIN() macro in Builtins.def. 264 std::string builtin_str() const; 265 266 /// Return the value in NeonTypeFlags for this type. 267 unsigned getNeonEnum() const; 268 269 /// Parse a type from a stdint.h or arm_neon.h typedef name, 270 /// for example uint32x2_t or int64_t. 271 static Type fromTypedefName(StringRef Name); 272 273 private: 274 /// Creates the type based on the typespec string in TS. 275 /// Sets "Quad" to true if the "Q" or "H" modifiers were 276 /// seen. This is needed by applyModifier as some modifiers 277 /// only take effect if the type size was changed by "Q" or "H". 278 void applyTypespec(bool &Quad); 279 /// Applies prototype modifiers to the type. 280 void applyModifiers(StringRef Mods); 281 }; 282 283 //===----------------------------------------------------------------------===// 284 // Variable 285 //===----------------------------------------------------------------------===// 286 287 /// A variable is a simple class that just has a type and a name. 288 class Variable { 289 Type T; 290 std::string N; 291 292 public: 293 Variable() : T(Type::getVoid()) {} 294 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 295 296 Type getType() const { return T; } 297 std::string getName() const { return "__" + N; } 298 }; 299 300 //===----------------------------------------------------------------------===// 301 // Intrinsic 302 //===----------------------------------------------------------------------===// 303 304 /// The main grunt class. This represents an instantiation of an intrinsic with 305 /// a particular typespec and prototype. 306 class Intrinsic { 307 /// The Record this intrinsic was created from. 308 const Record *R; 309 /// The unmangled name. 310 std::string Name; 311 /// The input and output typespecs. InTS == OutTS except when 312 /// CartesianProductWith is non-empty - this is the case for vreinterpret. 313 TypeSpec OutTS, InTS; 314 /// The base class kind. Most intrinsics use ClassS, which has full type 315 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 316 /// signedness (i32), while some (ClassB) have no type at all, only a width 317 /// (32). 318 ClassKind CK; 319 /// The list of DAGs for the body. May be empty, in which case we should 320 /// emit a builtin call. 321 const ListInit *Body; 322 /// The architectural ifdef guard. 323 std::string ArchGuard; 324 /// The architectural target() guard. 325 std::string TargetGuard; 326 /// Set if the Unavailable bit is 1. This means we don't generate a body, 327 /// just an "unavailable" attribute on a declaration. 328 bool IsUnavailable; 329 /// Is this intrinsic safe for big-endian? or does it need its arguments 330 /// reversing? 331 bool BigEndianSafe; 332 333 /// The types of return value [0] and parameters [1..]. 334 std::vector<Type> Types; 335 336 SmallVector<ImmCheck, 2> ImmChecks; 337 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 338 int PolymorphicKeyType; 339 /// The local variables defined. 340 std::map<std::string, Variable, std::less<>> Variables; 341 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 342 bool NeededEarly; 343 /// UseMacro - set if we should implement using a macro or unset for a 344 /// function. 345 bool UseMacro; 346 /// The set of intrinsics that this intrinsic uses/requires. 347 std::set<Intrinsic *> Dependencies; 348 /// The "base type", which is Type('d', OutTS). InBaseType is only 349 /// different if CartesianProductWith is non-empty (for vreinterpret). 350 Type BaseType, InBaseType; 351 /// The return variable. 352 Variable RetVar; 353 /// A postfix to apply to every variable. Defaults to "". 354 std::string VariablePostfix; 355 356 NeonEmitter &Emitter; 357 std::stringstream OS; 358 359 bool isBigEndianSafe() const { 360 if (BigEndianSafe) 361 return true; 362 363 for (const auto &T : Types){ 364 if (T.isVector() && T.getNumElements() > 1) 365 return false; 366 } 367 return true; 368 } 369 370 public: 371 Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 372 TypeSpec InTS, ClassKind CK, const ListInit *Body, 373 NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard, 374 bool IsUnavailable, bool BigEndianSafe) 375 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 376 ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), 377 IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe), 378 PolymorphicKeyType(0), NeededEarly(false), UseMacro(false), 379 BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) { 380 // Modify the TypeSpec per-argument to get a concrete Type, and create 381 // known variables for each. 382 // Types[0] is the return value. 383 unsigned Pos = 0; 384 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 385 StringRef Mods = getNextModifiers(Proto, Pos); 386 while (!Mods.empty()) { 387 Types.emplace_back(InTS, Mods); 388 if (Mods.contains('!')) 389 PolymorphicKeyType = Types.size() - 1; 390 391 Mods = getNextModifiers(Proto, Pos); 392 } 393 394 for (const auto &Type : Types) { 395 // If this builtin takes an immediate argument, we need to #define it rather 396 // than use a standard declaration, so that SemaChecking can range check 397 // the immediate passed by the user. 398 399 // Pointer arguments need to use macros to avoid hiding aligned attributes 400 // from the pointer type. 401 402 // It is not permitted to pass or return an __fp16 by value, so intrinsics 403 // taking a scalar float16_t must be implemented as macros. 404 if (Type.isImmediate() || Type.isPointer() || 405 (Type.isScalar() && Type.isHalf())) 406 UseMacro = true; 407 } 408 409 int ArgIdx, Kind, TypeArgIdx; 410 for (const Record *I : R->getValueAsListOfDefs("ImmChecks")) { 411 unsigned EltSizeInBits = 0, VecSizeInBits = 0; 412 413 ArgIdx = I->getValueAsInt("ImmArgIdx"); 414 TypeArgIdx = I->getValueAsInt("TypeContextArgIdx"); 415 Kind = I->getValueAsDef("Kind")->getValueAsInt("Value"); 416 417 assert((ArgIdx >= 0 && Kind >= 0) && 418 "ImmArgIdx and Kind must be nonnegative"); 419 420 if (TypeArgIdx >= 0) { 421 Type ContextType = getParamType(TypeArgIdx); 422 423 // Element size cannot be set for intrinscs that map to polymorphic 424 // builtins. 425 if (CK != ClassB) 426 EltSizeInBits = ContextType.getElementSizeInBits(); 427 428 VecSizeInBits = ContextType.getSizeInBits(); 429 } 430 431 ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits); 432 } 433 sort(ImmChecks.begin(), ImmChecks.end(), 434 [](const ImmCheck &a, const ImmCheck &b) { 435 return a.getImmArgIdx() < b.getImmArgIdx(); 436 }); // Sort for comparison with other intrinsics which map to the 437 // same builtin 438 } 439 440 /// Get the Record that this intrinsic is based off. 441 const Record *getRecord() const { return R; } 442 /// Get the set of Intrinsics that this intrinsic calls. 443 /// this is the set of immediate dependencies, NOT the 444 /// transitive closure. 445 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 446 /// Get the architectural guard string (#ifdef). 447 std::string getArchGuard() const { return ArchGuard; } 448 std::string getTargetGuard() const { return TargetGuard; } 449 ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; } 450 /// Get the non-mangled name. 451 std::string getName() const { return Name; } 452 453 /// Return true if the intrinsic takes an immediate operand. 454 bool hasImmediate() const { 455 return any_of(Types, [](const Type &T) { return T.isImmediate(); }); 456 } 457 458 // Return if the supplied argument is an immediate 459 bool isArgImmediate(unsigned idx) const { 460 return Types[idx + 1].isImmediate(); 461 } 462 463 unsigned getNumParams() const { return Types.size() - 1; } 464 Type getReturnType() const { return Types[0]; } 465 Type getParamType(unsigned I) const { return Types[I + 1]; } 466 Type getBaseType() const { return BaseType; } 467 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 468 469 /// Return true if the prototype has a scalar argument. 470 bool protoHasScalar() const; 471 472 /// Return the index that parameter PIndex will sit at 473 /// in a generated function call. This is often just PIndex, 474 /// but may not be as things such as multiple-vector operands 475 /// and sret parameters need to be taken into account. 476 unsigned getGeneratedParamIdx(unsigned PIndex) { 477 unsigned Idx = 0; 478 if (getReturnType().getNumVectors() > 1) 479 // Multiple vectors are passed as sret. 480 ++Idx; 481 482 for (unsigned I = 0; I < PIndex; ++I) 483 Idx += std::max(1U, getParamType(I).getNumVectors()); 484 485 return Idx; 486 } 487 488 bool hasBody() const { return Body && !Body->getValues().empty(); } 489 490 void setNeededEarly() { NeededEarly = true; } 491 492 bool operator<(const Intrinsic &Other) const { 493 // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name) 494 if (ArchGuard != Other.ArchGuard) 495 return ArchGuard < Other.ArchGuard; 496 if (TargetGuard != Other.TargetGuard) 497 return TargetGuard < Other.TargetGuard; 498 return Name < Other.Name; 499 } 500 501 ClassKind getClassKind(bool UseClassBIfScalar = false) { 502 if (UseClassBIfScalar && !protoHasScalar()) 503 return ClassB; 504 return CK; 505 } 506 507 /// Return the name, mangled with type information. 508 /// If ForceClassS is true, use ClassS (u32/s32) instead 509 /// of the intrinsic's own type class. 510 std::string getMangledName(bool ForceClassS = false) const; 511 /// Return the type code for a builtin function call. 512 std::string getInstTypeCode(Type T, ClassKind CK) const; 513 /// Return the type string for a BUILTIN() macro in Builtins.def. 514 std::string getBuiltinTypeStr(); 515 516 /// Generate the intrinsic, returning code. 517 std::string generate(); 518 /// Perform type checking and populate the dependency graph, but 519 /// don't generate code yet. 520 void indexBody(); 521 522 private: 523 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 524 525 std::string mangleName(std::string Name, ClassKind CK) const; 526 527 void initVariables(); 528 std::string replaceParamsIn(std::string S); 529 530 void emitBodyAsBuiltinCall(); 531 532 void generateImpl(bool ReverseArguments, 533 StringRef NamePrefix, StringRef CallPrefix); 534 void emitReturn(); 535 void emitBody(StringRef CallPrefix); 536 void emitShadowedArgs(); 537 void emitArgumentReversal(); 538 void emitReturnVarDecl(); 539 void emitReturnReversal(); 540 void emitReverseVariable(Variable &Dest, Variable &Src); 541 void emitNewLine(); 542 void emitClosingBrace(); 543 void emitOpeningBrace(); 544 void emitPrototype(StringRef NamePrefix); 545 546 class DagEmitter { 547 Intrinsic &Intr; 548 StringRef CallPrefix; 549 550 public: 551 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 552 Intr(Intr), CallPrefix(CallPrefix) { 553 } 554 std::pair<Type, std::string> emitDagArg(const Init *Arg, 555 std::string ArgName); 556 std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI); 557 std::pair<Type, std::string> emitDagSplat(const DagInit *DI); 558 std::pair<Type, std::string> emitDagDup(const DagInit *DI); 559 std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI); 560 std::pair<Type, std::string> emitDagShuffle(const DagInit *DI); 561 std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast); 562 std::pair<Type, std::string> emitDagCall(const DagInit *DI, 563 bool MatchMangledName); 564 std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI); 565 std::pair<Type, std::string> emitDagLiteral(const DagInit *DI); 566 std::pair<Type, std::string> emitDagOp(const DagInit *DI); 567 std::pair<Type, std::string> emitDag(const DagInit *DI); 568 }; 569 }; 570 571 //===----------------------------------------------------------------------===// 572 // NeonEmitter 573 //===----------------------------------------------------------------------===// 574 575 class NeonEmitter { 576 const RecordKeeper &Records; 577 DenseMap<const Record *, ClassKind> ClassMap; 578 std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap; 579 unsigned UniqueNumber; 580 581 void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out); 582 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 583 void genStreamingSVECompatibleList(raw_ostream &OS, 584 SmallVectorImpl<Intrinsic *> &Defs); 585 void genOverloadTypeCheckCode(raw_ostream &OS, 586 SmallVectorImpl<Intrinsic *> &Defs); 587 bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA, 588 const ArrayRef<ImmCheck> ChecksB); 589 void genIntrinsicRangeCheckCode(raw_ostream &OS, 590 SmallVectorImpl<Intrinsic *> &Defs); 591 592 public: 593 /// Called by Intrinsic - this attempts to get an intrinsic that takes 594 /// the given types as arguments. 595 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 596 std::optional<std::string> MangledName); 597 598 /// Called by Intrinsic - returns a globally-unique number. 599 unsigned getUniqueNumber() { return UniqueNumber++; } 600 601 NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) { 602 const Record *SI = R.getClass("SInst"); 603 const Record *II = R.getClass("IInst"); 604 const Record *WI = R.getClass("WInst"); 605 const Record *VI = R.getClass("VInst"); 606 const Record *SOpI = R.getClass("SOpInst"); 607 const Record *IOpI = R.getClass("IOpInst"); 608 const Record *WOpI = R.getClass("WOpInst"); 609 const Record *LOpI = R.getClass("LOpInst"); 610 const Record *NoTestOpI = R.getClass("NoTestOpInst"); 611 612 ClassMap[SI] = ClassS; 613 ClassMap[II] = ClassI; 614 ClassMap[WI] = ClassW; 615 ClassMap[VI] = ClassV; 616 ClassMap[SOpI] = ClassS; 617 ClassMap[IOpI] = ClassI; 618 ClassMap[WOpI] = ClassW; 619 ClassMap[LOpI] = ClassL; 620 ClassMap[NoTestOpI] = ClassNoTest; 621 } 622 623 // Emit arm_neon.h.inc 624 void run(raw_ostream &o); 625 626 // Emit arm_fp16.h.inc 627 void runFP16(raw_ostream &o); 628 629 // Emit arm_bf16.h.inc 630 void runBF16(raw_ostream &o); 631 632 void runVectorTypes(raw_ostream &o); 633 634 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and 635 // arm_bf16.h 636 void runHeader(raw_ostream &o); 637 }; 638 639 } // end anonymous namespace 640 641 //===----------------------------------------------------------------------===// 642 // Type implementation 643 //===----------------------------------------------------------------------===// 644 645 std::string Type::str() const { 646 if (isVoid()) 647 return "void"; 648 if (isFPM()) 649 return "fpm_t"; 650 651 std::string S; 652 653 if (isInteger() && !isSigned()) 654 S += "u"; 655 656 if (isPoly()) 657 S += "poly"; 658 else if (isFloating()) 659 S += "float"; 660 else if (isBFloat16()) 661 S += "bfloat"; 662 else if (isMFloat8()) 663 S += "mfloat"; 664 else 665 S += "int"; 666 667 S += utostr(ElementBitwidth); 668 if (isVector()) 669 S += "x" + utostr(getNumElements()); 670 if (NumVectors > 1) 671 S += "x" + utostr(NumVectors); 672 S += "_t"; 673 674 if (Constant) 675 S += " const"; 676 if (Pointer) 677 S += " *"; 678 679 return S; 680 } 681 682 std::string Type::builtin_str() const { 683 std::string S; 684 if (isVoid()) 685 return "v"; 686 687 if (isPointer()) { 688 // All pointers are void pointers. 689 S = "v"; 690 if (isConstPointer()) 691 S += "C"; 692 S += "*"; 693 return S; 694 } else if (isInteger()) 695 switch (ElementBitwidth) { 696 case 8: S += "c"; break; 697 case 16: S += "s"; break; 698 case 32: S += "i"; break; 699 case 64: S += "Wi"; break; 700 case 128: S += "LLLi"; break; 701 default: llvm_unreachable("Unhandled case!"); 702 } 703 else if (isBFloat16()) { 704 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); 705 S += "y"; 706 } else if (isMFloat8()) { 707 assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits"); 708 S += "m"; 709 } else if (isFPM()) { 710 S += "UWi"; 711 } else 712 switch (ElementBitwidth) { 713 case 16: S += "h"; break; 714 case 32: S += "f"; break; 715 case 64: S += "d"; break; 716 default: llvm_unreachable("Unhandled case!"); 717 } 718 719 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 720 if (isChar() && !isPointer() && isSigned()) 721 // Make chars explicitly signed. 722 S = "S" + S; 723 else if (isInteger() && !isSigned()) 724 S = "U" + S; 725 726 // Constant indices are "int", but have the "constant expression" modifier. 727 if (isImmediate()) { 728 assert(isInteger() && isSigned()); 729 S = "I" + S; 730 } 731 732 if (isScalar()) 733 return S; 734 735 std::string Ret; 736 for (unsigned I = 0; I < NumVectors; ++I) 737 Ret += "V" + utostr(getNumElements()) + S; 738 739 return Ret; 740 } 741 742 unsigned Type::getNeonEnum() const { 743 unsigned Addend; 744 switch (ElementBitwidth) { 745 case 8: Addend = 0; break; 746 case 16: Addend = 1; break; 747 case 32: Addend = 2; break; 748 case 64: Addend = 3; break; 749 case 128: Addend = 4; break; 750 default: llvm_unreachable("Unhandled element bitwidth!"); 751 } 752 753 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 754 if (isPoly()) { 755 // Adjustment needed because Poly32 doesn't exist. 756 if (Addend >= 2) 757 --Addend; 758 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 759 } 760 if (isFloating()) { 761 assert(Addend != 0 && "Float8 doesn't exist!"); 762 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 763 } 764 765 if (isBFloat16()) { 766 assert(Addend == 1 && "BFloat16 is only 16 bit"); 767 Base = (unsigned)NeonTypeFlags::BFloat16; 768 } 769 770 if (isMFloat8()) { 771 Base = (unsigned)NeonTypeFlags::MFloat8; 772 } 773 774 if (Bitwidth == 128) 775 Base |= (unsigned)NeonTypeFlags::QuadFlag; 776 if (isInteger() && !isSigned()) 777 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 778 779 return Base; 780 } 781 782 Type Type::fromTypedefName(StringRef Name) { 783 Type T; 784 T.Kind = SInt; 785 786 if (Name.consume_front("u")) 787 T.Kind = UInt; 788 789 if (Name.consume_front("float")) { 790 T.Kind = Float; 791 } else if (Name.consume_front("poly")) { 792 T.Kind = Poly; 793 } else if (Name.consume_front("bfloat")) { 794 T.Kind = BFloat16; 795 } else if (Name.consume_front("mfloat")) { 796 T.Kind = MFloat8; 797 } else { 798 assert(Name.starts_with("int")); 799 Name = Name.drop_front(3); 800 } 801 802 unsigned I = 0; 803 for (I = 0; I < Name.size(); ++I) { 804 if (!isdigit(Name[I])) 805 break; 806 } 807 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 808 Name = Name.drop_front(I); 809 810 T.Bitwidth = T.ElementBitwidth; 811 T.NumVectors = 1; 812 813 if (Name.consume_front("x")) { 814 unsigned I = 0; 815 for (I = 0; I < Name.size(); ++I) { 816 if (!isdigit(Name[I])) 817 break; 818 } 819 unsigned NumLanes; 820 Name.substr(0, I).getAsInteger(10, NumLanes); 821 Name = Name.drop_front(I); 822 T.Bitwidth = T.ElementBitwidth * NumLanes; 823 } else { 824 // Was scalar. 825 T.NumVectors = 0; 826 } 827 if (Name.consume_front("x")) { 828 unsigned I = 0; 829 for (I = 0; I < Name.size(); ++I) { 830 if (!isdigit(Name[I])) 831 break; 832 } 833 Name.substr(0, I).getAsInteger(10, T.NumVectors); 834 Name = Name.drop_front(I); 835 } 836 837 assert(Name.starts_with("_t") && "Malformed typedef!"); 838 return T; 839 } 840 841 void Type::applyTypespec(bool &Quad) { 842 std::string S = TS; 843 ScalarForMangling = false; 844 Kind = SInt; 845 ElementBitwidth = ~0U; 846 NumVectors = 1; 847 848 for (char I : S) { 849 switch (I) { 850 case 'S': 851 ScalarForMangling = true; 852 break; 853 case 'H': 854 NoManglingQ = true; 855 Quad = true; 856 break; 857 case 'Q': 858 Quad = true; 859 break; 860 case 'P': 861 Kind = Poly; 862 break; 863 case 'U': 864 Kind = UInt; 865 break; 866 case 'c': 867 ElementBitwidth = 8; 868 break; 869 case 'h': 870 Kind = Float; 871 [[fallthrough]]; 872 case 's': 873 ElementBitwidth = 16; 874 break; 875 case 'f': 876 Kind = Float; 877 [[fallthrough]]; 878 case 'i': 879 ElementBitwidth = 32; 880 break; 881 case 'd': 882 Kind = Float; 883 [[fallthrough]]; 884 case 'l': 885 ElementBitwidth = 64; 886 break; 887 case 'k': 888 ElementBitwidth = 128; 889 // Poly doesn't have a 128x1 type. 890 if (isPoly()) 891 NumVectors = 0; 892 break; 893 case 'b': 894 Kind = BFloat16; 895 ElementBitwidth = 16; 896 break; 897 case 'm': 898 Kind = MFloat8; 899 ElementBitwidth = 8; 900 break; 901 default: 902 llvm_unreachable("Unhandled type code!"); 903 } 904 } 905 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 906 907 Bitwidth = Quad ? 128 : 64; 908 } 909 910 void Type::applyModifiers(StringRef Mods) { 911 bool AppliedQuad = false; 912 applyTypespec(AppliedQuad); 913 914 for (char Mod : Mods) { 915 switch (Mod) { 916 case '.': 917 break; 918 case 'v': 919 Kind = Void; 920 break; 921 case 'S': 922 Kind = SInt; 923 break; 924 case 'U': 925 Kind = UInt; 926 break; 927 case 'B': 928 Kind = BFloat16; 929 ElementBitwidth = 16; 930 break; 931 case 'F': 932 Kind = Float; 933 break; 934 case 'P': 935 Kind = Poly; 936 break; 937 case 'V': 938 Kind = FPM; 939 Bitwidth = ElementBitwidth = 64; 940 NumVectors = 0; 941 Immediate = Constant = Pointer = false; 942 ScalarForMangling = NoManglingQ = true; 943 break; 944 case '>': 945 assert(ElementBitwidth < 128); 946 ElementBitwidth *= 2; 947 break; 948 case '<': 949 assert(ElementBitwidth > 8); 950 ElementBitwidth /= 2; 951 break; 952 case '1': 953 NumVectors = 0; 954 break; 955 case '2': 956 NumVectors = 2; 957 break; 958 case '3': 959 NumVectors = 3; 960 break; 961 case '4': 962 NumVectors = 4; 963 break; 964 case '*': 965 Pointer = true; 966 break; 967 case 'c': 968 Constant = true; 969 break; 970 case 'Q': 971 Bitwidth = 128; 972 break; 973 case 'q': 974 Bitwidth = 64; 975 break; 976 case 'I': 977 Kind = SInt; 978 ElementBitwidth = Bitwidth = 32; 979 NumVectors = 0; 980 Immediate = true; 981 break; 982 case 'p': 983 if (isPoly()) 984 Kind = UInt; 985 break; 986 case '!': 987 // Key type, handled elsewhere. 988 break; 989 default: 990 llvm_unreachable("Unhandled character!"); 991 } 992 } 993 } 994 995 //===----------------------------------------------------------------------===// 996 // Intrinsic implementation 997 //===----------------------------------------------------------------------===// 998 999 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 1000 if (Proto.size() == Pos) 1001 return StringRef(); 1002 else if (Proto[Pos] != '(') 1003 return Proto.substr(Pos++, 1); 1004 1005 size_t Start = Pos + 1; 1006 size_t End = Proto.find(')', Start); 1007 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 1008 Pos = End + 1; 1009 return Proto.slice(Start, End); 1010 } 1011 1012 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 1013 char typeCode = '\0'; 1014 bool printNumber = true; 1015 1016 if (CK == ClassB && TargetGuard == "neon") 1017 return ""; 1018 1019 if (this->CK == ClassV) 1020 return ""; 1021 1022 if (T.isBFloat16()) 1023 return "bf16"; 1024 1025 if (T.isMFloat8()) 1026 return "mf8"; 1027 1028 if (T.isPoly()) 1029 typeCode = 'p'; 1030 else if (T.isInteger()) 1031 typeCode = T.isSigned() ? 's' : 'u'; 1032 else 1033 typeCode = 'f'; 1034 1035 if (CK == ClassI) { 1036 switch (typeCode) { 1037 default: 1038 break; 1039 case 's': 1040 case 'u': 1041 case 'p': 1042 typeCode = 'i'; 1043 break; 1044 } 1045 } 1046 if (CK == ClassB && TargetGuard == "neon") { 1047 typeCode = '\0'; 1048 } 1049 1050 std::string S; 1051 if (typeCode != '\0') 1052 S.push_back(typeCode); 1053 if (printNumber) 1054 S += utostr(T.getElementSizeInBits()); 1055 1056 return S; 1057 } 1058 1059 std::string Intrinsic::getBuiltinTypeStr() { 1060 ClassKind LocalCK = getClassKind(true); 1061 std::string S; 1062 1063 Type RetT = getReturnType(); 1064 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1065 !RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8()) 1066 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1067 1068 // Since the return value must be one type, return a vector type of the 1069 // appropriate width which we will bitcast. An exception is made for 1070 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1071 // fashion, storing them to a pointer arg. 1072 if (RetT.getNumVectors() > 1) { 1073 S += "vv*"; // void result with void* first argument 1074 } else { 1075 if (RetT.isPoly()) 1076 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1077 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 1078 RetT.makeSigned(); 1079 1080 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 1081 // Cast to vector of 8-bit elements. 1082 RetT.makeInteger(8, true); 1083 1084 S += RetT.builtin_str(); 1085 } 1086 1087 for (unsigned I = 0; I < getNumParams(); ++I) { 1088 Type T = getParamType(I); 1089 if (T.isPoly()) 1090 T.makeInteger(T.getElementSizeInBits(), false); 1091 1092 if (LocalCK == ClassB && !T.isScalar()) 1093 T.makeInteger(8, true); 1094 // Halves always get converted to 8-bit elements. 1095 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1096 T.makeInteger(8, true); 1097 1098 if (LocalCK == ClassI && T.isInteger()) 1099 T.makeSigned(); 1100 1101 if (isArgImmediate(I)) 1102 T.makeImmediate(32); 1103 1104 S += T.builtin_str(); 1105 } 1106 1107 // Extra constant integer to hold type class enum for this function, e.g. s8 1108 if (LocalCK == ClassB) 1109 S += "i"; 1110 1111 return S; 1112 } 1113 1114 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1115 // Check if the prototype has a scalar operand with the type of the vector 1116 // elements. If not, bitcasting the args will take care of arg checking. 1117 // The actual signedness etc. will be taken care of with special enums. 1118 ClassKind LocalCK = CK; 1119 if (!protoHasScalar()) 1120 LocalCK = ClassB; 1121 1122 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1123 } 1124 1125 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1126 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1127 std::string S = Name; 1128 1129 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1130 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || 1131 Name == "vcvt_f32_bf16") 1132 return Name; 1133 1134 if (!typeCode.empty()) { 1135 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1136 if (Name.size() >= 3 && isdigit(Name.back()) && 1137 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1138 S.insert(S.length() - 3, "_" + typeCode); 1139 else 1140 S += "_" + typeCode; 1141 } 1142 1143 if (BaseType != InBaseType) { 1144 // A reinterpret - out the input base type at the end. 1145 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1146 } 1147 1148 if (LocalCK == ClassB && TargetGuard == "neon") 1149 S += "_v"; 1150 1151 // Insert a 'q' before the first '_' character so that it ends up before 1152 // _lane or _n on vector-scalar operations. 1153 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1154 size_t Pos = S.find('_'); 1155 S.insert(Pos, "q"); 1156 } 1157 1158 char Suffix = '\0'; 1159 if (BaseType.isScalarForMangling()) { 1160 switch (BaseType.getElementSizeInBits()) { 1161 case 8: Suffix = 'b'; break; 1162 case 16: Suffix = 'h'; break; 1163 case 32: Suffix = 's'; break; 1164 case 64: Suffix = 'd'; break; 1165 default: llvm_unreachable("Bad suffix!"); 1166 } 1167 } 1168 if (Suffix != '\0') { 1169 size_t Pos = S.find('_'); 1170 S.insert(Pos, &Suffix, 1); 1171 } 1172 1173 return S; 1174 } 1175 1176 std::string Intrinsic::replaceParamsIn(std::string S) { 1177 while (S.find('$') != std::string::npos) { 1178 size_t Pos = S.find('$'); 1179 size_t End = Pos + 1; 1180 while (isalpha(S[End])) 1181 ++End; 1182 1183 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1184 assert_with_loc(Variables.find(VarName) != Variables.end(), 1185 "Variable not defined!"); 1186 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1187 } 1188 1189 return S; 1190 } 1191 1192 void Intrinsic::initVariables() { 1193 Variables.clear(); 1194 1195 // Modify the TypeSpec per-argument to get a concrete Type, and create 1196 // known variables for each. 1197 for (unsigned I = 1; I < Types.size(); ++I) { 1198 char NameC = '0' + (I - 1); 1199 std::string Name = "p"; 1200 Name.push_back(NameC); 1201 1202 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1203 } 1204 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1205 } 1206 1207 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1208 if (UseMacro) { 1209 OS << "#define "; 1210 } else { 1211 OS << "__ai "; 1212 if (TargetGuard != "") 1213 OS << "__attribute__((target(\"" << TargetGuard << "\"))) "; 1214 OS << Types[0].str() << " "; 1215 } 1216 1217 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1218 1219 for (unsigned I = 0; I < getNumParams(); ++I) { 1220 if (I != 0) 1221 OS << ", "; 1222 1223 char NameC = '0' + I; 1224 std::string Name = "p"; 1225 Name.push_back(NameC); 1226 assert(Variables.find(Name) != Variables.end()); 1227 Variable &V = Variables[Name]; 1228 1229 if (!UseMacro) 1230 OS << V.getType().str() << " "; 1231 OS << V.getName(); 1232 } 1233 1234 OS << ")"; 1235 } 1236 1237 void Intrinsic::emitOpeningBrace() { 1238 if (UseMacro) 1239 OS << " __extension__ ({"; 1240 else 1241 OS << " {"; 1242 emitNewLine(); 1243 } 1244 1245 void Intrinsic::emitClosingBrace() { 1246 if (UseMacro) 1247 OS << "})"; 1248 else 1249 OS << "}"; 1250 } 1251 1252 void Intrinsic::emitNewLine() { 1253 if (UseMacro) 1254 OS << " \\\n"; 1255 else 1256 OS << "\n"; 1257 } 1258 1259 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1260 if (Dest.getType().getNumVectors() > 1) { 1261 emitNewLine(); 1262 1263 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1264 OS << " " << Dest.getName() << ".val[" << K << "] = " 1265 << "__builtin_shufflevector(" 1266 << Src.getName() << ".val[" << K << "], " 1267 << Src.getName() << ".val[" << K << "]"; 1268 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1269 OS << ", " << J; 1270 OS << ");"; 1271 emitNewLine(); 1272 } 1273 } else { 1274 OS << " " << Dest.getName() 1275 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1276 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1277 OS << ", " << J; 1278 OS << ");"; 1279 emitNewLine(); 1280 } 1281 } 1282 1283 void Intrinsic::emitArgumentReversal() { 1284 if (isBigEndianSafe()) 1285 return; 1286 1287 // Reverse all vector arguments. 1288 for (unsigned I = 0; I < getNumParams(); ++I) { 1289 std::string Name = "p" + utostr(I); 1290 std::string NewName = "rev" + utostr(I); 1291 1292 Variable &V = Variables[Name]; 1293 Variable NewV(V.getType(), NewName + VariablePostfix); 1294 1295 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1296 continue; 1297 1298 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1299 emitReverseVariable(NewV, V); 1300 V = NewV; 1301 } 1302 } 1303 1304 void Intrinsic::emitReturnVarDecl() { 1305 assert(RetVar.getType() == Types[0]); 1306 // Create a return variable, if we're not void. 1307 if (!RetVar.getType().isVoid()) { 1308 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1309 emitNewLine(); 1310 } 1311 } 1312 1313 void Intrinsic::emitReturnReversal() { 1314 if (isBigEndianSafe()) 1315 return; 1316 if (!getReturnType().isVector() || getReturnType().isVoid() || 1317 getReturnType().getNumElements() == 1) 1318 return; 1319 emitReverseVariable(RetVar, RetVar); 1320 } 1321 1322 void Intrinsic::emitShadowedArgs() { 1323 // Macro arguments are not type-checked like inline function arguments, 1324 // so assign them to local temporaries to get the right type checking. 1325 if (!UseMacro) 1326 return; 1327 1328 for (unsigned I = 0; I < getNumParams(); ++I) { 1329 // Do not create a temporary for an immediate argument. 1330 // That would defeat the whole point of using a macro! 1331 if (getParamType(I).isImmediate()) 1332 continue; 1333 // Do not create a temporary for pointer arguments. The input 1334 // pointer may have an alignment hint. 1335 if (getParamType(I).isPointer()) 1336 continue; 1337 1338 std::string Name = "p" + utostr(I); 1339 1340 assert(Variables.find(Name) != Variables.end()); 1341 Variable &V = Variables[Name]; 1342 1343 std::string NewName = "s" + utostr(I); 1344 Variable V2(V.getType(), NewName + VariablePostfix); 1345 1346 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1347 << V.getName() << ";"; 1348 emitNewLine(); 1349 1350 V = V2; 1351 } 1352 } 1353 1354 bool Intrinsic::protoHasScalar() const { 1355 return any_of(Types, 1356 [](const Type &T) { return T.isScalar() && !T.isImmediate(); }); 1357 } 1358 1359 void Intrinsic::emitBodyAsBuiltinCall() { 1360 std::string S; 1361 1362 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1363 // sret-like argument. 1364 bool SRet = getReturnType().getNumVectors() >= 2; 1365 1366 StringRef N = Name; 1367 ClassKind LocalCK = CK; 1368 if (!protoHasScalar()) 1369 LocalCK = ClassB; 1370 1371 if (!getReturnType().isVoid() && !SRet) 1372 S += "(" + RetVar.getType().str() + ") "; 1373 1374 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1375 1376 if (SRet) 1377 S += "&" + RetVar.getName() + ", "; 1378 1379 for (unsigned I = 0; I < getNumParams(); ++I) { 1380 Variable &V = Variables["p" + utostr(I)]; 1381 Type T = V.getType(); 1382 1383 // Handle multiple-vector values specially, emitting each subvector as an 1384 // argument to the builtin. 1385 if (T.getNumVectors() > 1) { 1386 // Check if an explicit cast is needed. 1387 std::string Cast; 1388 if (LocalCK == ClassB) { 1389 Type T2 = T; 1390 T2.makeOneVector(); 1391 T2.makeInteger(8, /*Sign=*/true); 1392 Cast = "(" + T2.str() + ")"; 1393 } 1394 1395 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1396 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1397 continue; 1398 } 1399 1400 std::string Arg = V.getName(); 1401 Type CastToType = T; 1402 1403 // Check if an explicit cast is needed. 1404 if (CastToType.isVector() && 1405 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1406 CastToType.makeInteger(8, true); 1407 Arg = "(" + CastToType.str() + ")" + Arg; 1408 } else if (CastToType.isVector() && LocalCK == ClassI) { 1409 if (CastToType.isInteger()) 1410 CastToType.makeSigned(); 1411 Arg = "(" + CastToType.str() + ")" + Arg; 1412 } 1413 1414 S += Arg + ", "; 1415 } 1416 1417 // Extra constant integer to hold type class enum for this function, e.g. s8 1418 if (getClassKind(true) == ClassB) { 1419 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1420 } else { 1421 // Remove extraneous ", ". 1422 S.pop_back(); 1423 S.pop_back(); 1424 } 1425 S += ");"; 1426 1427 std::string RetExpr; 1428 if (!SRet && !RetVar.getType().isVoid()) 1429 RetExpr = RetVar.getName() + " = "; 1430 1431 OS << " " << RetExpr << S; 1432 emitNewLine(); 1433 } 1434 1435 void Intrinsic::emitBody(StringRef CallPrefix) { 1436 std::vector<std::string> Lines; 1437 1438 if (!Body || Body->getValues().empty()) { 1439 // Nothing specific to output - must output a builtin. 1440 emitBodyAsBuiltinCall(); 1441 return; 1442 } 1443 1444 // We have a list of "things to output". The last should be returned. 1445 for (auto *I : Body->getValues()) { 1446 if (const auto *SI = dyn_cast<StringInit>(I)) { 1447 Lines.push_back(replaceParamsIn(SI->getAsString())); 1448 } else if (const auto *DI = dyn_cast<DagInit>(I)) { 1449 DagEmitter DE(*this, CallPrefix); 1450 Lines.push_back(DE.emitDag(DI).second + ";"); 1451 } 1452 } 1453 1454 assert(!Lines.empty() && "Empty def?"); 1455 if (!RetVar.getType().isVoid()) 1456 Lines.back().insert(0, RetVar.getName() + " = "); 1457 1458 for (auto &L : Lines) { 1459 OS << " " << L; 1460 emitNewLine(); 1461 } 1462 } 1463 1464 void Intrinsic::emitReturn() { 1465 if (RetVar.getType().isVoid()) 1466 return; 1467 if (UseMacro) 1468 OS << " " << RetVar.getName() << ";"; 1469 else 1470 OS << " return " << RetVar.getName() << ";"; 1471 emitNewLine(); 1472 } 1473 1474 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) { 1475 // At this point we should only be seeing a def. 1476 const DefInit *DefI = cast<DefInit>(DI->getOperator()); 1477 std::string Op = DefI->getAsString(); 1478 1479 if (Op == "cast" || Op == "bitcast") 1480 return emitDagCast(DI, Op == "bitcast"); 1481 if (Op == "shuffle") 1482 return emitDagShuffle(DI); 1483 if (Op == "dup") 1484 return emitDagDup(DI); 1485 if (Op == "dup_typed") 1486 return emitDagDupTyped(DI); 1487 if (Op == "splat") 1488 return emitDagSplat(DI); 1489 if (Op == "save_temp") 1490 return emitDagSaveTemp(DI); 1491 if (Op == "op") 1492 return emitDagOp(DI); 1493 if (Op == "call" || Op == "call_mangled") 1494 return emitDagCall(DI, Op == "call_mangled"); 1495 if (Op == "name_replace") 1496 return emitDagNameReplace(DI); 1497 if (Op == "literal") 1498 return emitDagLiteral(DI); 1499 assert_with_loc(false, "Unknown operation!"); 1500 return std::make_pair(Type::getVoid(), ""); 1501 } 1502 1503 std::pair<Type, std::string> 1504 Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) { 1505 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1506 if (DI->getNumArgs() == 2) { 1507 // Unary op. 1508 std::pair<Type, std::string> R = 1509 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1510 return std::make_pair(R.first, Op + R.second); 1511 } else { 1512 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1513 std::pair<Type, std::string> R1 = 1514 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1515 std::pair<Type, std::string> R2 = 1516 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1517 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1518 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1519 } 1520 } 1521 1522 std::pair<Type, std::string> 1523 Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) { 1524 std::vector<Type> Types; 1525 std::vector<std::string> Values; 1526 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1527 std::pair<Type, std::string> R = 1528 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1529 Types.push_back(R.first); 1530 Values.push_back(R.second); 1531 } 1532 1533 // Look up the called intrinsic. 1534 std::string N; 1535 if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) 1536 N = SI->getAsUnquotedString(); 1537 else 1538 N = emitDagArg(DI->getArg(0), "").second; 1539 std::optional<std::string> MangledName; 1540 if (MatchMangledName) { 1541 if (Intr.getRecord()->getValueAsString("Name").contains("laneq")) 1542 N += "q"; 1543 MangledName = Intr.mangleName(N, ClassS); 1544 } 1545 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1546 1547 // Make sure the callee is known as an early def. 1548 Callee.setNeededEarly(); 1549 Intr.Dependencies.insert(&Callee); 1550 1551 // Now create the call itself. 1552 std::string S; 1553 if (!Callee.isBigEndianSafe()) 1554 S += CallPrefix.str(); 1555 S += Callee.getMangledName(true) + "("; 1556 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1557 if (I != 0) 1558 S += ", "; 1559 S += Values[I]; 1560 } 1561 S += ")"; 1562 1563 return std::make_pair(Callee.getReturnType(), S); 1564 } 1565 1566 std::pair<Type, std::string> 1567 Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) { 1568 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1569 std::pair<Type, std::string> R = 1570 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1571 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1572 Type castToType = R.first; 1573 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1574 1575 // MOD can take several forms: 1576 // 1. $X - take the type of parameter / variable X. 1577 // 2. The value "R" - take the type of the return type. 1578 // 3. a type string 1579 // 4. The value "U" or "S" to switch the signedness. 1580 // 5. The value "H" or "D" to half or double the bitwidth. 1581 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1582 if (!DI->getArgNameStr(ArgIdx).empty()) { 1583 assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) != 1584 Intr.Variables.end(), 1585 "Variable not found"); 1586 castToType = 1587 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1588 } else { 1589 const auto *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1590 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1591 1592 if (SI->getAsUnquotedString() == "R") { 1593 castToType = Intr.getReturnType(); 1594 } else if (SI->getAsUnquotedString() == "U") { 1595 castToType.makeUnsigned(); 1596 } else if (SI->getAsUnquotedString() == "S") { 1597 castToType.makeSigned(); 1598 } else if (SI->getAsUnquotedString() == "H") { 1599 castToType.halveLanes(); 1600 } else if (SI->getAsUnquotedString() == "D") { 1601 castToType.doubleLanes(); 1602 } else if (SI->getAsUnquotedString() == "8") { 1603 castToType.makeInteger(8, true); 1604 } else if (SI->getAsUnquotedString() == "32") { 1605 castToType.make32BitElement(); 1606 } else { 1607 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1608 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1609 } 1610 } 1611 } 1612 1613 std::string S; 1614 if (IsBitCast) 1615 S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")"; 1616 else 1617 S = "(" + castToType.str() + ")(" + R.second + ")"; 1618 1619 return std::make_pair(castToType, S); 1620 } 1621 1622 std::pair<Type, std::string> 1623 Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) { 1624 // See the documentation in arm_neon.td for a description of these operators. 1625 class LowHalf : public SetTheory::Operator { 1626 public: 1627 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1628 ArrayRef<SMLoc> Loc) override { 1629 SetTheory::RecSet Elts2; 1630 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1631 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1632 } 1633 }; 1634 1635 class HighHalf : public SetTheory::Operator { 1636 public: 1637 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1638 ArrayRef<SMLoc> Loc) override { 1639 SetTheory::RecSet Elts2; 1640 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1641 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1642 } 1643 }; 1644 1645 class Rev : public SetTheory::Operator { 1646 unsigned ElementSize; 1647 1648 public: 1649 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1650 1651 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts, 1652 ArrayRef<SMLoc> Loc) override { 1653 SetTheory::RecSet Elts2; 1654 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1655 1656 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1657 VectorSize /= ElementSize; 1658 1659 std::vector<const Record *> Revved; 1660 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1661 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1662 Revved.push_back(Elts2[VI + LI]); 1663 } 1664 } 1665 1666 Elts.insert(Revved.begin(), Revved.end()); 1667 } 1668 }; 1669 1670 class MaskExpander : public SetTheory::Expander { 1671 unsigned N; 1672 1673 public: 1674 MaskExpander(unsigned N) : N(N) {} 1675 1676 void expand(SetTheory &ST, const Record *R, 1677 SetTheory::RecSet &Elts) override { 1678 unsigned Addend = 0; 1679 if (R->getName() == "mask0") 1680 Addend = 0; 1681 else if (R->getName() == "mask1") 1682 Addend = N; 1683 else 1684 return; 1685 for (unsigned I = 0; I < N; ++I) 1686 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1687 } 1688 }; 1689 1690 // (shuffle arg1, arg2, sequence) 1691 std::pair<Type, std::string> Arg1 = 1692 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1693 std::pair<Type, std::string> Arg2 = 1694 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1695 assert_with_loc(Arg1.first == Arg2.first, 1696 "Different types in arguments to shuffle!"); 1697 1698 SetTheory ST; 1699 SetTheory::RecSet Elts; 1700 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1701 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1702 ST.addOperator("rev", 1703 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1704 ST.addExpander("MaskExpand", 1705 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1706 ST.evaluate(DI->getArg(2), Elts, {}); 1707 1708 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1709 for (auto &E : Elts) { 1710 StringRef Name = E->getName(); 1711 assert_with_loc(Name.starts_with("sv"), 1712 "Incorrect element kind in shuffle mask!"); 1713 S += ", " + Name.drop_front(2).str(); 1714 } 1715 S += ")"; 1716 1717 // Recalculate the return type - the shuffle may have halved or doubled it. 1718 Type T(Arg1.first); 1719 if (Elts.size() > T.getNumElements()) { 1720 assert_with_loc( 1721 Elts.size() == T.getNumElements() * 2, 1722 "Can only double or half the number of elements in a shuffle!"); 1723 T.doubleLanes(); 1724 } else if (Elts.size() < T.getNumElements()) { 1725 assert_with_loc( 1726 Elts.size() == T.getNumElements() / 2, 1727 "Can only double or half the number of elements in a shuffle!"); 1728 T.halveLanes(); 1729 } 1730 1731 return std::make_pair(T, S); 1732 } 1733 1734 std::pair<Type, std::string> 1735 Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) { 1736 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1737 std::pair<Type, std::string> A = 1738 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1739 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1740 1741 Type T = Intr.getBaseType(); 1742 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1743 std::string S = "(" + T.str() + ") {"; 1744 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1745 if (I != 0) 1746 S += ", "; 1747 S += A.second; 1748 } 1749 S += "}"; 1750 1751 return std::make_pair(T, S); 1752 } 1753 1754 std::pair<Type, std::string> 1755 Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) { 1756 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1757 std::pair<Type, std::string> B = 1758 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1759 assert_with_loc(B.first.isScalar(), 1760 "dup_typed() requires a scalar as the second argument"); 1761 Type T; 1762 // If the type argument is a constant string, construct the type directly. 1763 if (const auto *SI = dyn_cast<StringInit>(DI->getArg(0))) { 1764 T = Type::fromTypedefName(SI->getAsUnquotedString()); 1765 assert_with_loc(!T.isVoid(), "Unknown typedef"); 1766 } else 1767 T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first; 1768 1769 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1770 std::string S = "(" + T.str() + ") {"; 1771 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1772 if (I != 0) 1773 S += ", "; 1774 S += B.second; 1775 } 1776 S += "}"; 1777 1778 return std::make_pair(T, S); 1779 } 1780 1781 std::pair<Type, std::string> 1782 Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) { 1783 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1784 std::pair<Type, std::string> A = 1785 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1786 std::pair<Type, std::string> B = 1787 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1788 1789 assert_with_loc(B.first.isScalar(), 1790 "splat() requires a scalar int as the second argument"); 1791 1792 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1793 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1794 S += ", " + B.second; 1795 } 1796 S += ")"; 1797 1798 return std::make_pair(Intr.getBaseType(), S); 1799 } 1800 1801 std::pair<Type, std::string> 1802 Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) { 1803 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1804 std::pair<Type, std::string> A = 1805 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1806 1807 assert_with_loc(!A.first.isVoid(), 1808 "Argument to save_temp() must have non-void type!"); 1809 1810 std::string N = std::string(DI->getArgNameStr(0)); 1811 assert_with_loc(!N.empty(), 1812 "save_temp() expects a name as the first argument"); 1813 1814 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1815 "Variable already defined!"); 1816 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1817 1818 std::string S = 1819 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1820 1821 return std::make_pair(Type::getVoid(), S); 1822 } 1823 1824 std::pair<Type, std::string> 1825 Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) { 1826 std::string S = Intr.Name; 1827 1828 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1829 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1830 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1831 1832 size_t Idx = S.find(ToReplace); 1833 1834 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1835 S.replace(Idx, ToReplace.size(), ReplaceWith); 1836 1837 return std::make_pair(Type::getVoid(), S); 1838 } 1839 1840 std::pair<Type, std::string> 1841 Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) { 1842 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1843 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1844 return std::make_pair(Type::fromTypedefName(Ty), Value); 1845 } 1846 1847 std::pair<Type, std::string> 1848 Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) { 1849 if (!ArgName.empty()) { 1850 assert_with_loc(!Arg->isComplete(), 1851 "Arguments must either be DAGs or names, not both!"); 1852 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1853 "Variable not defined!"); 1854 Variable &V = Intr.Variables[ArgName]; 1855 return std::make_pair(V.getType(), V.getName()); 1856 } 1857 1858 assert(Arg && "Neither ArgName nor Arg?!"); 1859 const auto *DI = dyn_cast<DagInit>(Arg); 1860 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1861 1862 return emitDag(DI); 1863 } 1864 1865 std::string Intrinsic::generate() { 1866 // Avoid duplicated code for big and little endian 1867 if (isBigEndianSafe()) { 1868 generateImpl(false, "", ""); 1869 return OS.str(); 1870 } 1871 // Little endian intrinsics are simple and don't require any argument 1872 // swapping. 1873 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1874 1875 generateImpl(false, "", ""); 1876 1877 OS << "#else\n"; 1878 1879 // Big endian intrinsics are more complex. The user intended these 1880 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1881 // but we load as-if (V)LD1. So we should swap all arguments and 1882 // swap the return value too. 1883 // 1884 // If we call sub-intrinsics, we should call a version that does 1885 // not re-swap the arguments! 1886 generateImpl(true, "", "__noswap_"); 1887 1888 // If we're needed early, create a non-swapping variant for 1889 // big-endian. 1890 if (NeededEarly) { 1891 generateImpl(false, "__noswap_", "__noswap_"); 1892 } 1893 OS << "#endif\n\n"; 1894 1895 return OS.str(); 1896 } 1897 1898 void Intrinsic::generateImpl(bool ReverseArguments, 1899 StringRef NamePrefix, StringRef CallPrefix) { 1900 CurrentRecord = R; 1901 1902 // If we call a macro, our local variables may be corrupted due to 1903 // lack of proper lexical scoping. So, add a globally unique postfix 1904 // to every variable. 1905 // 1906 // indexBody() should have set up the Dependencies set by now. 1907 for (auto *I : Dependencies) 1908 if (I->UseMacro) { 1909 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1910 break; 1911 } 1912 1913 initVariables(); 1914 1915 emitPrototype(NamePrefix); 1916 1917 if (IsUnavailable) { 1918 OS << " __attribute__((unavailable));"; 1919 } else { 1920 emitOpeningBrace(); 1921 // Emit return variable declaration first as to not trigger 1922 // -Wdeclaration-after-statement. 1923 emitReturnVarDecl(); 1924 emitShadowedArgs(); 1925 if (ReverseArguments) 1926 emitArgumentReversal(); 1927 emitBody(CallPrefix); 1928 if (ReverseArguments) 1929 emitReturnReversal(); 1930 emitReturn(); 1931 emitClosingBrace(); 1932 } 1933 OS << "\n"; 1934 1935 CurrentRecord = nullptr; 1936 } 1937 1938 void Intrinsic::indexBody() { 1939 CurrentRecord = R; 1940 1941 initVariables(); 1942 // Emit return variable declaration first as to not trigger 1943 // -Wdeclaration-after-statement. 1944 emitReturnVarDecl(); 1945 emitBody(""); 1946 OS.str(""); 1947 1948 CurrentRecord = nullptr; 1949 } 1950 1951 //===----------------------------------------------------------------------===// 1952 // NeonEmitter implementation 1953 //===----------------------------------------------------------------------===// 1954 1955 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1956 std::optional<std::string> MangledName) { 1957 // First, look up the name in the intrinsic map. 1958 assert_with_loc(IntrinsicMap.find(Name) != IntrinsicMap.end(), 1959 ("Intrinsic '" + Name + "' not found!").str()); 1960 auto &V = IntrinsicMap.find(Name)->second; 1961 std::vector<Intrinsic *> GoodVec; 1962 1963 // Create a string to print if we end up failing. 1964 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1965 for (unsigned I = 0; I < Types.size(); ++I) { 1966 if (I != 0) 1967 ErrMsg += ", "; 1968 ErrMsg += Types[I].str(); 1969 } 1970 ErrMsg += ")'\n"; 1971 ErrMsg += "Available overloads:\n"; 1972 1973 // Now, look through each intrinsic implementation and see if the types are 1974 // compatible. 1975 for (auto &I : V) { 1976 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1977 ErrMsg += "("; 1978 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1979 if (A != 0) 1980 ErrMsg += ", "; 1981 ErrMsg += I.getParamType(A).str(); 1982 } 1983 ErrMsg += ")\n"; 1984 1985 if (MangledName && MangledName != I.getMangledName(true)) 1986 continue; 1987 1988 if (I.getNumParams() != Types.size()) 1989 continue; 1990 1991 unsigned ArgNum = 0; 1992 bool MatchingArgumentTypes = all_of(Types, [&](const auto &Type) { 1993 return Type == I.getParamType(ArgNum++); 1994 }); 1995 1996 if (MatchingArgumentTypes) 1997 GoodVec.push_back(&I); 1998 } 1999 2000 assert_with_loc(!GoodVec.empty(), 2001 "No compatible intrinsic found - " + ErrMsg); 2002 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 2003 2004 return *GoodVec.front(); 2005 } 2006 2007 void NeonEmitter::createIntrinsic(const Record *R, 2008 SmallVectorImpl<Intrinsic *> &Out) { 2009 std::string Name = std::string(R->getValueAsString("Name")); 2010 std::string Proto = std::string(R->getValueAsString("Prototype")); 2011 std::string Types = std::string(R->getValueAsString("Types")); 2012 const Record *OperationRec = R->getValueAsDef("Operation"); 2013 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 2014 std::string ArchGuard = std::string(R->getValueAsString("ArchGuard")); 2015 std::string TargetGuard = std::string(R->getValueAsString("TargetGuard")); 2016 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 2017 std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); 2018 2019 // Set the global current record. This allows assert_with_loc to produce 2020 // decent location information even when highly nested. 2021 CurrentRecord = R; 2022 2023 const ListInit *Body = OperationRec->getValueAsListInit("Ops"); 2024 2025 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 2026 2027 ClassKind CK = ClassNone; 2028 if (R->getSuperClasses().size() >= 2) 2029 CK = ClassMap[R->getSuperClasses()[1].first]; 2030 2031 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 2032 if (!CartesianProductWith.empty()) { 2033 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); 2034 for (auto TS : TypeSpecs) { 2035 Type DefaultT(TS, "."); 2036 for (auto SrcTS : ProductTypeSpecs) { 2037 Type DefaultSrcT(SrcTS, "."); 2038 if (TS == SrcTS || 2039 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 2040 continue; 2041 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 2042 } 2043 } 2044 } else { 2045 for (auto TS : TypeSpecs) { 2046 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 2047 } 2048 } 2049 2050 sort(NewTypeSpecs); 2051 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 2052 NewTypeSpecs.end()); 2053 auto &Entry = IntrinsicMap[Name]; 2054 2055 for (auto &I : NewTypeSpecs) { 2056 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 2057 ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe); 2058 Out.push_back(&Entry.back()); 2059 } 2060 2061 CurrentRecord = nullptr; 2062 } 2063 2064 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2065 /// declaration of builtins, checking for unique builtin declarations. 2066 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2067 SmallVectorImpl<Intrinsic *> &Defs) { 2068 OS << "#ifdef GET_NEON_BUILTINS\n"; 2069 2070 // We only want to emit a builtin once, and we want to emit them in 2071 // alphabetical order, so use a std::set. 2072 std::set<std::pair<std::string, std::string>> Builtins; 2073 2074 for (auto *Def : Defs) { 2075 if (Def->hasBody()) 2076 continue; 2077 2078 std::string S = "__builtin_neon_" + Def->getMangledName() + ", \""; 2079 S += Def->getBuiltinTypeStr(); 2080 S += "\", \"n\""; 2081 2082 Builtins.emplace(S, Def->getTargetGuard()); 2083 } 2084 2085 for (auto &S : Builtins) { 2086 if (S.second == "") 2087 OS << "BUILTIN("; 2088 else 2089 OS << "TARGET_BUILTIN("; 2090 OS << S.first; 2091 if (S.second == "") 2092 OS << ")\n"; 2093 else 2094 OS << ", \"" << S.second << "\")\n"; 2095 } 2096 2097 OS << "#endif\n\n"; 2098 } 2099 2100 void NeonEmitter::genStreamingSVECompatibleList( 2101 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) { 2102 OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n"; 2103 2104 std::set<std::string> Emitted; 2105 for (auto *Def : Defs) { 2106 // If the def has a body (that is, it has Operation DAGs), it won't call 2107 // __builtin_neon_* so we don't need to generate a definition for it. 2108 if (Def->hasBody()) 2109 continue; 2110 2111 std::string Name = Def->getMangledName(); 2112 if (Emitted.find(Name) != Emitted.end()) 2113 continue; 2114 2115 // FIXME: We should make exceptions here for some NEON builtins that are 2116 // permitted in streaming mode. 2117 OS << "case NEON::BI__builtin_neon_" << Name 2118 << ": BuiltinType = ArmNonStreaming; break;\n"; 2119 Emitted.insert(Name); 2120 } 2121 OS << "#endif\n\n"; 2122 } 2123 2124 /// Generate the ARM and AArch64 overloaded type checking code for 2125 /// SemaChecking.cpp, checking for unique builtin declarations. 2126 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2127 SmallVectorImpl<Intrinsic *> &Defs) { 2128 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2129 2130 // We record each overload check line before emitting because subsequent Inst 2131 // definitions may extend the number of permitted types (i.e. augment the 2132 // Mask). Use std::map to avoid sorting the table by hash number. 2133 struct OverloadInfo { 2134 uint64_t Mask = 0ULL; 2135 int PtrArgNum = 0; 2136 bool HasConstPtr = false; 2137 OverloadInfo() = default; 2138 }; 2139 std::map<std::string, OverloadInfo> OverloadMap; 2140 2141 for (auto *Def : Defs) { 2142 // If the def has a body (that is, it has Operation DAGs), it won't call 2143 // __builtin_neon_* so we don't need to generate a definition for it. 2144 if (Def->hasBody()) 2145 continue; 2146 // Functions which have a scalar argument cannot be overloaded, no need to 2147 // check them if we are emitting the type checking code. 2148 if (Def->protoHasScalar()) 2149 continue; 2150 2151 uint64_t Mask = 0ULL; 2152 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2153 2154 // Check if the function has a pointer or const pointer argument. 2155 int PtrArgNum = -1; 2156 bool HasConstPtr = false; 2157 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2158 const auto &Type = Def->getParamType(I); 2159 if (Type.isPointer()) { 2160 PtrArgNum = I; 2161 HasConstPtr = Type.isConstPointer(); 2162 } 2163 } 2164 2165 // For sret builtins, adjust the pointer argument index. 2166 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2167 PtrArgNum += 1; 2168 2169 std::string Name = Def->getName(); 2170 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2171 // vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to 2172 // the vector element type with one of those operations causes codegen to 2173 // select an aligned load/store instruction. If you want an unaligned 2174 // operation, the pointer argument needs to have less alignment than element 2175 // type, so just accept any pointer type. 2176 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" || 2177 Name == "vldap1_lane" || Name == "vstl1_lane") { 2178 PtrArgNum = -1; 2179 HasConstPtr = false; 2180 } 2181 2182 if (Mask) { 2183 OverloadInfo &OI = OverloadMap[Def->getMangledName()]; 2184 OI.Mask |= Mask; 2185 OI.PtrArgNum |= PtrArgNum; 2186 OI.HasConstPtr = HasConstPtr; 2187 } 2188 } 2189 2190 for (auto &I : OverloadMap) { 2191 OverloadInfo &OI = I.second; 2192 2193 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2194 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2195 if (OI.PtrArgNum >= 0) 2196 OS << "; PtrArgNum = " << OI.PtrArgNum; 2197 if (OI.HasConstPtr) 2198 OS << "; HasConstPtr = true"; 2199 OS << "; break;\n"; 2200 } 2201 OS << "#endif\n\n"; 2202 } 2203 2204 inline bool 2205 NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA, 2206 const ArrayRef<ImmCheck> ChecksB) { 2207 // If multiple intrinsics map to the same builtin, we must ensure that the 2208 // intended range checks performed in SemaArm.cpp do not contradict each 2209 // other, as these are emitted once per-buitlin. 2210 // 2211 // The arguments to be checked and type of each check to be performed must be 2212 // the same. The element types may differ as they will be resolved 2213 // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes 2214 // are not and so must be the same. 2215 bool compat = 2216 std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(), 2217 [](const auto &A, const auto &B) { 2218 return A.getImmArgIdx() == B.getImmArgIdx() && 2219 A.getKind() == B.getKind() && 2220 A.getVecSizeInBits() == B.getVecSizeInBits(); 2221 }); 2222 2223 return compat; 2224 } 2225 2226 void NeonEmitter::genIntrinsicRangeCheckCode( 2227 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) { 2228 std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted; 2229 2230 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2231 for (auto &Def : Defs) { 2232 // If the Def has a body (operation DAGs), it is not a __builtin_neon_ 2233 if (Def->hasBody() || !Def->hasImmediate()) 2234 continue; 2235 2236 // Sorted by immediate argument index 2237 ArrayRef<ImmCheck> Checks = Def->getImmChecks(); 2238 2239 const auto it = Emitted.find(Def->getMangledName()); 2240 if (it != Emitted.end()) { 2241 assert(areRangeChecksCompatible(Checks, it->second) && 2242 "Neon intrinsics with incompatible immediate range checks cannot " 2243 "share a builtin."); 2244 continue; // Ensure this is emitted only once 2245 } 2246 2247 // Emit builtin's range checks 2248 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n"; 2249 for (const auto &Check : Checks) { 2250 OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", " 2251 << Check.getKind() << ", " << Check.getElementSizeInBits() << ", " 2252 << Check.getVecSizeInBits() << ");\n" 2253 << " break;\n"; 2254 } 2255 Emitted[Def->getMangledName()] = Checks; 2256 } 2257 2258 OS << "#endif\n\n"; 2259 } 2260 2261 /// runHeader - Emit a file with sections defining: 2262 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2263 /// 2. the SemaChecking code for the type overload checking. 2264 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2265 void NeonEmitter::runHeader(raw_ostream &OS) { 2266 SmallVector<Intrinsic *, 128> Defs; 2267 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2268 createIntrinsic(R, Defs); 2269 2270 // Generate shared BuiltinsXXX.def 2271 genBuiltinsDef(OS, Defs); 2272 2273 // Generate ARM overloaded type checking code for SemaChecking.cpp 2274 genOverloadTypeCheckCode(OS, Defs); 2275 2276 genStreamingSVECompatibleList(OS, Defs); 2277 2278 // Generate ARM range checking code for shift/lane immediates. 2279 genIntrinsicRangeCheckCode(OS, Defs); 2280 } 2281 2282 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { 2283 std::string TypedefTypes(types); 2284 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2285 2286 // Emit vector typedefs. 2287 bool InIfdef = false; 2288 for (auto &TS : TDTypeVec) { 2289 bool IsA64 = false; 2290 Type T(TS, "."); 2291 if (T.isDouble() || T.isMFloat8()) 2292 IsA64 = true; 2293 2294 if (InIfdef && !IsA64) { 2295 OS << "#endif\n"; 2296 InIfdef = false; 2297 } 2298 if (!InIfdef && IsA64) { 2299 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2300 InIfdef = true; 2301 } 2302 2303 if (T.isPoly()) 2304 OS << "typedef __attribute__((neon_polyvector_type("; 2305 else 2306 OS << "typedef __attribute__((neon_vector_type("; 2307 2308 Type T2 = T; 2309 T2.makeScalar(); 2310 OS << T.getNumElements(); 2311 OS << "))) " << T2.str(); 2312 OS << " " << T.str() << ";\n"; 2313 } 2314 if (InIfdef) 2315 OS << "#endif\n"; 2316 OS << "\n"; 2317 2318 // Emit struct typedefs. 2319 InIfdef = false; 2320 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2321 for (auto &TS : TDTypeVec) { 2322 bool IsA64 = false; 2323 Type T(TS, "."); 2324 if (T.isDouble() || T.isMFloat8()) 2325 IsA64 = true; 2326 2327 if (InIfdef && !IsA64) { 2328 OS << "#endif\n"; 2329 InIfdef = false; 2330 } 2331 if (!InIfdef && IsA64) { 2332 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2333 InIfdef = true; 2334 } 2335 2336 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2337 Type VT(TS, Mods); 2338 OS << "typedef struct " << VT.str() << " {\n"; 2339 OS << " " << T.str() << " val"; 2340 OS << "[" << NumMembers << "]"; 2341 OS << ";\n} "; 2342 OS << VT.str() << ";\n"; 2343 OS << "\n"; 2344 } 2345 } 2346 if (InIfdef) 2347 OS << "#endif\n"; 2348 } 2349 2350 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2351 /// is comprised of type definitions and function declarations. 2352 void NeonEmitter::run(raw_ostream &OS) { 2353 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2354 "------------------------------" 2355 "---===\n" 2356 " *\n" 2357 " * Permission is hereby granted, free of charge, to any person " 2358 "obtaining " 2359 "a copy\n" 2360 " * of this software and associated documentation files (the " 2361 "\"Software\")," 2362 " to deal\n" 2363 " * in the Software without restriction, including without limitation " 2364 "the " 2365 "rights\n" 2366 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2367 "and/or sell\n" 2368 " * copies of the Software, and to permit persons to whom the Software " 2369 "is\n" 2370 " * furnished to do so, subject to the following conditions:\n" 2371 " *\n" 2372 " * The above copyright notice and this permission notice shall be " 2373 "included in\n" 2374 " * all copies or substantial portions of the Software.\n" 2375 " *\n" 2376 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2377 "EXPRESS OR\n" 2378 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2379 "MERCHANTABILITY,\n" 2380 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2381 "SHALL THE\n" 2382 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2383 "OTHER\n" 2384 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2385 "ARISING FROM,\n" 2386 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2387 "DEALINGS IN\n" 2388 " * THE SOFTWARE.\n" 2389 " *\n" 2390 " *===-----------------------------------------------------------------" 2391 "---" 2392 "---===\n" 2393 " */\n\n"; 2394 2395 OS << "#ifndef __ARM_NEON_H\n"; 2396 OS << "#define __ARM_NEON_H\n\n"; 2397 2398 OS << "#ifndef __ARM_FP\n"; 2399 OS << "#error \"NEON intrinsics not available with the soft-float ABI. " 2400 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; 2401 OS << "#else\n\n"; 2402 2403 OS << "#include <stdint.h>\n\n"; 2404 2405 OS << "#include <arm_bf16.h>\n"; 2406 2407 OS << "#include <arm_vector_types.h>\n"; 2408 2409 // For now, signedness of polynomial types depends on target 2410 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2411 OS << "typedef uint8_t poly8_t;\n"; 2412 OS << "typedef uint16_t poly16_t;\n"; 2413 OS << "typedef uint64_t poly64_t;\n"; 2414 OS << "typedef __uint128_t poly128_t;\n"; 2415 OS << "#else\n"; 2416 OS << "typedef int8_t poly8_t;\n"; 2417 OS << "typedef int16_t poly16_t;\n"; 2418 OS << "typedef int64_t poly64_t;\n"; 2419 OS << "#endif\n"; 2420 emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS); 2421 2422 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2423 "__nodebug__))\n\n"; 2424 2425 SmallVector<Intrinsic *, 128> Defs; 2426 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2427 createIntrinsic(R, Defs); 2428 2429 for (auto *I : Defs) 2430 I->indexBody(); 2431 2432 stable_sort(Defs, deref<std::less<>>()); 2433 2434 // Only emit a def when its requirements have been met. 2435 // FIXME: This loop could be made faster, but it's fast enough for now. 2436 bool MadeProgress = true; 2437 std::string InGuard; 2438 while (!Defs.empty() && MadeProgress) { 2439 MadeProgress = false; 2440 2441 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2442 I != Defs.end(); /*No step*/) { 2443 bool DependenciesSatisfied = true; 2444 for (auto *II : (*I)->getDependencies()) { 2445 if (is_contained(Defs, II)) 2446 DependenciesSatisfied = false; 2447 } 2448 if (!DependenciesSatisfied) { 2449 // Try the next one. 2450 ++I; 2451 continue; 2452 } 2453 2454 // Emit #endif/#if pair if needed. 2455 if ((*I)->getArchGuard() != InGuard) { 2456 if (!InGuard.empty()) 2457 OS << "#endif\n"; 2458 InGuard = (*I)->getArchGuard(); 2459 if (!InGuard.empty()) 2460 OS << "#if " << InGuard << "\n"; 2461 } 2462 2463 // Actually generate the intrinsic code. 2464 OS << (*I)->generate(); 2465 2466 MadeProgress = true; 2467 I = Defs.erase(I); 2468 } 2469 } 2470 assert(Defs.empty() && "Some requirements were not satisfied!"); 2471 if (!InGuard.empty()) 2472 OS << "#endif\n"; 2473 2474 OS << "\n"; 2475 OS << "#undef __ai\n\n"; 2476 OS << "#endif /* if !defined(__ARM_NEON) */\n"; 2477 OS << "#endif /* ifndef __ARM_FP */\n"; 2478 } 2479 2480 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2481 /// is comprised of type definitions and function declarations. 2482 void NeonEmitter::runFP16(raw_ostream &OS) { 2483 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2484 "------------------------------" 2485 "---===\n" 2486 " *\n" 2487 " * Permission is hereby granted, free of charge, to any person " 2488 "obtaining a copy\n" 2489 " * of this software and associated documentation files (the " 2490 "\"Software\"), to deal\n" 2491 " * in the Software without restriction, including without limitation " 2492 "the rights\n" 2493 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2494 "and/or sell\n" 2495 " * copies of the Software, and to permit persons to whom the Software " 2496 "is\n" 2497 " * furnished to do so, subject to the following conditions:\n" 2498 " *\n" 2499 " * The above copyright notice and this permission notice shall be " 2500 "included in\n" 2501 " * all copies or substantial portions of the Software.\n" 2502 " *\n" 2503 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2504 "EXPRESS OR\n" 2505 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2506 "MERCHANTABILITY,\n" 2507 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2508 "SHALL THE\n" 2509 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2510 "OTHER\n" 2511 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2512 "ARISING FROM,\n" 2513 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2514 "DEALINGS IN\n" 2515 " * THE SOFTWARE.\n" 2516 " *\n" 2517 " *===-----------------------------------------------------------------" 2518 "---" 2519 "---===\n" 2520 " */\n\n"; 2521 2522 OS << "#ifndef __ARM_FP16_H\n"; 2523 OS << "#define __ARM_FP16_H\n\n"; 2524 2525 OS << "#include <stdint.h>\n\n"; 2526 2527 OS << "typedef __fp16 float16_t;\n"; 2528 2529 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2530 "__nodebug__))\n\n"; 2531 2532 SmallVector<Intrinsic *, 128> Defs; 2533 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2534 createIntrinsic(R, Defs); 2535 2536 for (auto *I : Defs) 2537 I->indexBody(); 2538 2539 stable_sort(Defs, deref<std::less<>>()); 2540 2541 // Only emit a def when its requirements have been met. 2542 // FIXME: This loop could be made faster, but it's fast enough for now. 2543 bool MadeProgress = true; 2544 std::string InGuard; 2545 while (!Defs.empty() && MadeProgress) { 2546 MadeProgress = false; 2547 2548 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2549 I != Defs.end(); /*No step*/) { 2550 bool DependenciesSatisfied = true; 2551 for (auto *II : (*I)->getDependencies()) { 2552 if (is_contained(Defs, II)) 2553 DependenciesSatisfied = false; 2554 } 2555 if (!DependenciesSatisfied) { 2556 // Try the next one. 2557 ++I; 2558 continue; 2559 } 2560 2561 // Emit #endif/#if pair if needed. 2562 if ((*I)->getArchGuard() != InGuard) { 2563 if (!InGuard.empty()) 2564 OS << "#endif\n"; 2565 InGuard = (*I)->getArchGuard(); 2566 if (!InGuard.empty()) 2567 OS << "#if " << InGuard << "\n"; 2568 } 2569 2570 // Actually generate the intrinsic code. 2571 OS << (*I)->generate(); 2572 2573 MadeProgress = true; 2574 I = Defs.erase(I); 2575 } 2576 } 2577 assert(Defs.empty() && "Some requirements were not satisfied!"); 2578 if (!InGuard.empty()) 2579 OS << "#endif\n"; 2580 2581 OS << "\n"; 2582 OS << "#undef __ai\n\n"; 2583 OS << "#endif /* __ARM_FP16_H */\n"; 2584 } 2585 2586 void NeonEmitter::runVectorTypes(raw_ostream &OS) { 2587 OS << "/*===---- arm_vector_types - ARM vector type " 2588 "------===\n" 2589 " *\n" 2590 " *\n" 2591 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2592 "Exceptions.\n" 2593 " * See https://llvm.org/LICENSE.txt for license information.\n" 2594 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2595 " *\n" 2596 " *===-----------------------------------------------------------------" 2597 "------===\n" 2598 " */\n\n"; 2599 OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n"; 2600 OS << "#error \"This file should not be used standalone. Please include" 2601 " arm_neon.h or arm_sve.h instead\"\n\n"; 2602 OS << "#endif\n"; 2603 OS << "#ifndef __ARM_NEON_TYPES_H\n"; 2604 OS << "#define __ARM_NEON_TYPES_H\n"; 2605 OS << "typedef float float32_t;\n"; 2606 OS << "typedef __fp16 float16_t;\n"; 2607 2608 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; 2609 OS << "typedef __mfp8 mfloat8_t;\n"; 2610 OS << "typedef double float64_t;\n"; 2611 OS << "#endif\n\n"; 2612 2613 OS << R"( 2614 typedef uint64_t fpm_t; 2615 2616 enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 }; 2617 2618 enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE }; 2619 2620 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2621 __arm_fpm_init(void) { 2622 return 0; 2623 } 2624 2625 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2626 __arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2627 return (__fpm & ~7ull) | (fpm_t)__format; 2628 } 2629 2630 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2631 __arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2632 return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u); 2633 } 2634 2635 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2636 __arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { 2637 return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u); 2638 } 2639 2640 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2641 __arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { 2642 return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u); 2643 } 2644 2645 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2646 __arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { 2647 return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u); 2648 } 2649 2650 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2651 __arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) { 2652 return (__fpm & ~0x7f0000ull) | (__scale << 16u); 2653 } 2654 2655 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2656 __arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) { 2657 return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u); 2658 } 2659 2660 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) 2661 __arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) { 2662 return (uint32_t)__fpm | (__scale << 32u); 2663 } 2664 2665 )"; 2666 2667 emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS); 2668 2669 emitNeonTypeDefs("bQb", OS); 2670 OS << "#endif // __ARM_NEON_TYPES_H\n"; 2671 } 2672 2673 void NeonEmitter::runBF16(raw_ostream &OS) { 2674 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " 2675 "-----------------------------------===\n" 2676 " *\n" 2677 " *\n" 2678 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " 2679 "Exceptions.\n" 2680 " * See https://llvm.org/LICENSE.txt for license information.\n" 2681 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" 2682 " *\n" 2683 " *===-----------------------------------------------------------------" 2684 "------===\n" 2685 " */\n\n"; 2686 2687 OS << "#ifndef __ARM_BF16_H\n"; 2688 OS << "#define __ARM_BF16_H\n\n"; 2689 2690 OS << "typedef __bf16 bfloat16_t;\n"; 2691 2692 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2693 "__nodebug__))\n\n"; 2694 2695 SmallVector<Intrinsic *, 128> Defs; 2696 for (const Record *R : Records.getAllDerivedDefinitions("Inst")) 2697 createIntrinsic(R, Defs); 2698 2699 for (auto *I : Defs) 2700 I->indexBody(); 2701 2702 stable_sort(Defs, deref<std::less<>>()); 2703 2704 // Only emit a def when its requirements have been met. 2705 // FIXME: This loop could be made faster, but it's fast enough for now. 2706 bool MadeProgress = true; 2707 std::string InGuard; 2708 while (!Defs.empty() && MadeProgress) { 2709 MadeProgress = false; 2710 2711 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2712 I != Defs.end(); /*No step*/) { 2713 bool DependenciesSatisfied = true; 2714 for (auto *II : (*I)->getDependencies()) { 2715 if (is_contained(Defs, II)) 2716 DependenciesSatisfied = false; 2717 } 2718 if (!DependenciesSatisfied) { 2719 // Try the next one. 2720 ++I; 2721 continue; 2722 } 2723 2724 // Emit #endif/#if pair if needed. 2725 if ((*I)->getArchGuard() != InGuard) { 2726 if (!InGuard.empty()) 2727 OS << "#endif\n"; 2728 InGuard = (*I)->getArchGuard(); 2729 if (!InGuard.empty()) 2730 OS << "#if " << InGuard << "\n"; 2731 } 2732 2733 // Actually generate the intrinsic code. 2734 OS << (*I)->generate(); 2735 2736 MadeProgress = true; 2737 I = Defs.erase(I); 2738 } 2739 } 2740 assert(Defs.empty() && "Some requirements were not satisfied!"); 2741 if (!InGuard.empty()) 2742 OS << "#endif\n"; 2743 2744 OS << "\n"; 2745 OS << "#undef __ai\n\n"; 2746 2747 OS << "#endif\n"; 2748 } 2749 2750 void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) { 2751 NeonEmitter(Records).run(OS); 2752 } 2753 2754 void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) { 2755 NeonEmitter(Records).runFP16(OS); 2756 } 2757 2758 void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) { 2759 NeonEmitter(Records).runBF16(OS); 2760 } 2761 2762 void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) { 2763 NeonEmitter(Records).runHeader(OS); 2764 } 2765 2766 void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) { 2767 NeonEmitter(Records).runVectorTypes(OS); 2768 } 2769 2770 void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) { 2771 llvm_unreachable("Neon test generation no longer implemented!"); 2772 } 2773