1//===--- arm_neon_incl.td - ARM NEON compiler interface -------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines data structures shared by arm_neon.td and arm_fp16.td. 10// It constains base operation classes, operations, instructions, instruction 11// modifiers, etc. 12// 13//===----------------------------------------------------------------------===// 14// 15// Each intrinsic is a subclass of the Inst class. An intrinsic can either 16// generate a __builtin_* call or it can expand to a set of generic operations. 17// 18// The operations are subclasses of Operation providing a list of DAGs, the 19// last of which is the return value. The available DAG nodes are documented 20// below. 21// 22//===----------------------------------------------------------------------===// 23 24include "arm_immcheck_incl.td" 25 26// The base Operation class. All operations must subclass this. 27class Operation<list<dag> ops=[]> { 28 list<dag> Ops = ops; 29 bit Unavailable = 0; 30} 31// An operation that only contains a single DAG. 32class Op<dag op> : Operation<[op]>; 33// A shorter version of Operation - takes a list of DAGs. The last of these will 34// be the return value. 35class LOp<list<dag> ops> : Operation<ops>; 36 37// These defs and classes are used internally to implement the SetTheory 38// expansion and should be ignored. 39foreach Index = 0-63 in 40 def sv#Index; 41class MaskExpand; 42 43//===----------------------------------------------------------------------===// 44// Available operations 45//===----------------------------------------------------------------------===// 46 47// DAG arguments can either be operations (documented below) or variables. 48// Variables are prefixed with '$'. There are variables for each input argument, 49// with the name $pN, where N starts at zero. So the zero'th argument will be 50// $p0, the first $p1 etc. 51 52// op - Binary or unary operator, depending on the number of arguments. The 53// operator itself is just treated as a raw string and is not checked. 54// example: (op "+", $p0, $p1) -> "__p0 + __p1". 55// (op "-", $p0) -> "-__p0" 56def op; 57// call - Invoke another intrinsic. The input types are type checked and 58// disambiguated. If there is no intrinsic defined that takes 59// the given types (or if there is a type ambiguity) an error is 60// generated at tblgen time. The name of the intrinsic is the raw 61// name as given to the Inst class (not mangled). 62// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)" 63// (assuming $p0 has type int16x8_t). 64def call; 65// call_mangled - Invoke another intrinsic matching the mangled name variation 66// of the caller's base type. If there is no intrinsic defined 67// that has the variation and takes the given types, an error 68// is generated at tblgen time. 69// example: (call_mangled "vfma_lane", $p0, $p1) -> "vfma_lane(__p0, __p1)" 70// (assuming non-LaneQ caller) 71// (call_mangled "vfma_lane", $p0, $p1) -> "vfma_laneq(__p0, __p1)" 72// (assuming LaneQ caller) 73def call_mangled; 74// cast - Perform a cast to a different type. This gets emitted as a static 75// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use 76// "bitcast". 77// 78// The syntax is (cast MOD* VAL). The last argument is the value to 79// cast, preceded by a sequence of type modifiers. The target type 80// starts off as the type of VAL, and is modified by MOD in sequence. 81// The available modifiers are: 82// - $X - Take the type of parameter/variable X. For example: 83// (cast $p0, $p1) would cast $p1 to the type of $p0. 84// - "R" - The type of the return type. 85// - A typedef string - A NEON or stdint.h type that is then parsed. 86// for example: (cast "uint32x4_t", $p0). 87// - "U" - Make the type unsigned. 88// - "S" - Make the type signed. 89// - "H" - Halve the number of lanes in the type. 90// - "D" - Double the number of lanes in the type. 91// - "8" - Convert type to an equivalent vector of 8-bit signed 92// integers. 93// - "32" - Convert type to an equivalent vector of 32-bit integers. 94// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return 95// value is of type "int32x4_t". 96// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0 97// has type float64x1_t or any other vector type of 64 bits). 98// (cast "int32_t", $p2) -> "(int32_t)__p2" 99def cast; 100// bitcast - Same as "cast", except a reinterpret-cast is produced: 101// (bitcast "T", $p0) -> "*(T*)&__p0". 102// The VAL argument is saved to a temporary so it can be used 103// as an l-value. 104def bitcast; 105// dup - Take a scalar argument and create a vector by duplicating it into 106// all lanes. The type of the vector is the base type of the intrinsic. 107// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type 108// is uint32x2_t). 109def dup; 110// dup_typed - Take a vector and a scalar argument, and create a new vector of 111// the same type by duplicating the scalar value into all lanes. 112// example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}" 113// (assuming __p1 is float16x4_t, and __p2 is a compatible scalar). 114def dup_typed; 115// save_temp - Create a temporary (local) variable. The variable takes a name 116// based on the zero'th parameter and can be referenced using 117// using that name in subsequent DAGs in the same 118// operation. The scope of a temp is the operation. If a variable 119// with the given name already exists, an error will be given at 120// tblgen time. 121// example: [(save_temp $var, (call "foo", $p0)), 122// (op "+", $var, $p1)] -> 123// "int32x2_t __var = foo(__p0); return __var + __p1;" 124def save_temp; 125// name_replace - Return the name of the current intrinsic with the first 126// argument replaced by the second argument. Raises an error if 127// the first argument does not exist in the intrinsic name. 128// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high 129// version of this intrinsic). 130def name_replace; 131// literal - Create a literal piece of code. The code is treated as a raw 132// string, and must be given a type. The type is a stdint.h or 133// NEON intrinsic type as given to (cast). 134// example: (literal "int32_t", "0") 135def literal; 136// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK). 137// The MASK argument is a set of elements. The elements are generated 138// from the two special defs "mask0" and "mask1". "mask0" expands to 139// the lane indices in sequence for ARG0, and "mask1" expands to 140// the lane indices in sequence for ARG1. They can be used as-is, e.g. 141// 142// (shuffle $p0, $p1, mask0) -> $p0 143// (shuffle $p0, $p1, mask1) -> $p1 144// 145// or, more usefully, they can be manipulated using the SetTheory 146// operators plus some extra operators defined in the NEON emitter. 147// The operators are described below. 148// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) -> 149// A concatenation of the high halves of the input vectors. 150def shuffle; 151 152// add, interleave, decimate: These set operators are vanilla SetTheory 153// operators and take their normal definition. 154def add; 155def interleave; 156def decimate; 157// rotl - Rotate set left by a number of elements. 158// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2] 159def rotl; 160// rotl - Rotate set right by a number of elements. 161// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3] 162def rotr; 163// highhalf - Take only the high half of the input. 164// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements) 165def highhalf; 166// highhalf - Take only the low half of the input. 167// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements) 168def lowhalf; 169// rev - Perform a variable-width reversal of the elements. The zero'th argument 170// is a width in bits to reverse. The lanes this maps to is determined 171// based on the element width of the underlying type. 172// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements) 173// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements) 174def rev; 175// mask0 - The initial sequence of lanes for shuffle ARG0 176def mask0 : MaskExpand; 177// mask0 - The initial sequence of lanes for shuffle ARG1 178def mask1 : MaskExpand; 179 180def OP_NONE : Operation; 181def OP_UNAVAILABLE : Operation { 182 let Unavailable = 1; 183} 184 185//===----------------------------------------------------------------------===// 186// Instruction definitions 187//===----------------------------------------------------------------------===// 188 189// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and 190// a sequence of typespecs. 191// 192// The name is the base name of the intrinsic, for example "vget_lane". This is 193// then mangled by the tblgen backend to add type information ("vget_lane_s16"). 194// 195// A typespec is a sequence of uppercase characters (modifiers) followed by one 196// lowercase character. A typespec encodes a particular "base type" of the 197// intrinsic. 198// 199// An example typespec is "Qs" - quad-size short - uint16x8_t. The available 200// typespec codes are given below. 201// 202// The string given to an Inst class is a sequence of typespecs. The intrinsic 203// is instantiated for every typespec in the sequence. For example "sdQsQd". 204// 205// The prototype is a string that defines the return type of the intrinsic 206// and the type of each argument. The return type and every argument gets a 207// set of "modifiers" that can change in some way the "base type" of the 208// intrinsic. 209// 210// Typespecs 211// --------- 212// c: char 213// s: short 214// i: int 215// l: long 216// k: 128-bit long 217// f: float 218// h: half-float 219// d: double 220// b: bfloat16 221// m: mfloat8 222// 223// Typespec modifiers 224// ------------------ 225// S: scalar, only used for function mangling. 226// U: unsigned 227// Q: 128b 228// H: 128b without mangling 'q' 229// P: polynomial 230// 231// Prototype modifiers 232// ------------------- 233// prototype: return (arg, arg, ...) 234// 235// Each type modifier is either a single character, or a group surrounded by 236// parentheses. 237// 238// .: default 239// v: change to void category. 240// S: change to signed integer category. 241// U: change to unsigned integer category. 242// F: change to floating category. 243// B: change to BFloat16 244// P: change to polynomial category. 245// p: change polynomial to equivalent integer category. Otherwise nop. 246// V: change to fpm_t 247// 248// >: double element width (vector size unchanged). 249// <: half element width (vector size unchanged). 250// 251// 1: change to scalar. 252// 2: change to struct of two vectors. 253// 3: change to struct of three vectors. 254// 4: change to struct of four vectors. 255// 256// *: make a pointer argument. 257// c: make a constant argument (for pointers). 258// 259// Q: force 128-bit width. 260// q: force 64-bit width. 261// 262// I: make 32-bit signed scalar immediate 263// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call. 264 265 266// Every intrinsic subclasses Inst. 267class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{ 268 string Name = n; 269 string Prototype = p; 270 string Types = t; 271 string ArchGuard = ""; 272 string TargetGuard = "neon"; 273 274 Operation Operation = o; 275 bit BigEndianSafe = 0; 276 bit isShift = 0; 277 bit isScalarShift = 0; 278 bit isScalarNarrowShift = 0; 279 list<ImmCheck> ImmChecks = ch; 280 281 // Certain intrinsics have different names than their representative 282 // instructions. This field allows us to handle this correctly when we 283 // are generating tests. 284 string InstName = ""; 285 286 // Certain intrinsics even though they are not a WOpInst or LOpInst, 287 // generate a WOpInst/LOpInst instruction (see below for definition 288 // of a WOpInst/LOpInst). For testing purposes we need to know 289 // this. Ex: vset_lane which outputs vmov instructions. 290 bit isHiddenWInst = 0; 291 bit isHiddenLInst = 0; 292 293 string CartesianProductWith = ""; 294} 295 296// The following instruction classes are implemented via builtins. 297// These declarations are used to generate Builtins.def: 298// 299// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8") 300// IInst: Instruction with generic integer suffix (e.g., "i8") 301// WInst: Instruction with only bit size suffix (e.g., "8") 302class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} 303class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} 304class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} 305class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} 306 307// The following instruction classes are implemented via operators 308// instead of builtins. As such these declarations are only used for 309// the purpose of generating tests. 310// 311// SOpInst: Instruction with signed/unsigned suffix (e.g., "s8", 312// "u8", "p8"). 313// IOpInst: Instruction with generic integer suffix (e.g., "i8"). 314// WOpInst: Instruction with bit size only suffix (e.g., "8"). 315// LOpInst: Logical instruction with no bit size suffix. 316// NoTestOpInst: Intrinsic that has no corresponding instruction. 317class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} 318class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} 319class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} 320class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} 321class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} 322