xref: /llvm-project/clang/include/clang/Basic/arm_neon_incl.td (revision 804b81d39f2d50743fd2090aed72dad29f5fb388)
1//===--- arm_neon_incl.td - ARM NEON compiler interface -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines data structures shared by arm_neon.td and arm_fp16.td.
10//  It constains base operation classes, operations, instructions, instruction
11//  modifiers, etc.
12//
13//===----------------------------------------------------------------------===//
14//
15// Each intrinsic is a subclass of the Inst class. An intrinsic can either
16// generate a __builtin_* call or it can expand to a set of generic operations.
17//
18// The operations are subclasses of Operation providing a list of DAGs, the
19// last of which is the return value. The available DAG nodes are documented
20// below.
21//
22//===----------------------------------------------------------------------===//
23
24include "arm_immcheck_incl.td"
25
26// The base Operation class. All operations must subclass this.
27class Operation<list<dag> ops=[]> {
28  list<dag> Ops = ops;
29  bit Unavailable = 0;
30}
31// An operation that only contains a single DAG.
32class Op<dag op> : Operation<[op]>;
33// A shorter version of Operation - takes a list of DAGs. The last of these will
34// be the return value.
35class LOp<list<dag> ops> : Operation<ops>;
36
37// These defs and classes are used internally to implement the SetTheory
38// expansion and should be ignored.
39foreach Index = 0-63 in
40  def sv#Index;
41class MaskExpand;
42
43//===----------------------------------------------------------------------===//
44// Available operations
45//===----------------------------------------------------------------------===//
46
47// DAG arguments can either be operations (documented below) or variables.
48// Variables are prefixed with '$'. There are variables for each input argument,
49// with the name $pN, where N starts at zero. So the zero'th argument will be
50// $p0, the first $p1 etc.
51
52// op - Binary or unary operator, depending on the number of arguments. The
53//      operator itself is just treated as a raw string and is not checked.
54// example: (op "+", $p0, $p1) -> "__p0 + __p1".
55//          (op "-", $p0)      -> "-__p0"
56def op;
57// call - Invoke another intrinsic. The input types are type checked and
58//        disambiguated. If there is no intrinsic defined that takes
59//        the given types (or if there is a type ambiguity) an error is
60//        generated at tblgen time. The name of the intrinsic is the raw
61//        name as given to the Inst class (not mangled).
62// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
63//            (assuming $p0 has type int16x8_t).
64def call;
65// call_mangled - Invoke another intrinsic matching the mangled name variation
66//                of the caller's base type. If there is no intrinsic defined
67//                that has the variation and takes the given types, an error
68//                is generated at tblgen time.
69// example: (call_mangled "vfma_lane", $p0, $p1) -> "vfma_lane(__p0, __p1)"
70//            (assuming non-LaneQ caller)
71//          (call_mangled "vfma_lane", $p0, $p1) -> "vfma_laneq(__p0, __p1)"
72//            (assuming LaneQ caller)
73def call_mangled;
74// cast - Perform a cast to a different type. This gets emitted as a static
75//        C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
76//        "bitcast".
77//
78//        The syntax is (cast MOD* VAL). The last argument is the value to
79//        cast, preceded by a sequence of type modifiers. The target type
80//        starts off as the type of VAL, and is modified by MOD in sequence.
81//        The available modifiers are:
82//          - $X  - Take the type of parameter/variable X. For example:
83//                  (cast $p0, $p1) would cast $p1 to the type of $p0.
84//          - "R" - The type of the return type.
85//          - A typedef string - A NEON or stdint.h type that is then parsed.
86//                               for example: (cast "uint32x4_t", $p0).
87//          - "U" - Make the type unsigned.
88//          - "S" - Make the type signed.
89//          - "H" - Halve the number of lanes in the type.
90//          - "D" - Double the number of lanes in the type.
91//          - "8" - Convert type to an equivalent vector of 8-bit signed
92//                  integers.
93//          - "32" - Convert type to an equivalent vector of 32-bit integers.
94// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
95//           value is of type "int32x4_t".
96//          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
97//           has type float64x1_t or any other vector type of 64 bits).
98//          (cast "int32_t", $p2) -> "(int32_t)__p2"
99def cast;
100// bitcast - Same as "cast", except a reinterpret-cast is produced:
101//             (bitcast "T", $p0) -> "*(T*)&__p0".
102//           The VAL argument is saved to a temporary so it can be used
103//           as an l-value.
104def bitcast;
105// dup - Take a scalar argument and create a vector by duplicating it into
106//       all lanes. The type of the vector is the base type of the intrinsic.
107// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
108//          is uint32x2_t).
109def dup;
110// dup_typed - Take a vector and a scalar argument, and create a new vector of
111//             the same type by duplicating the scalar value into all lanes.
112// example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}"
113//          (assuming __p1 is float16x4_t, and __p2 is a compatible scalar).
114def dup_typed;
115// save_temp - Create a temporary (local) variable. The variable takes a name
116//             based on the zero'th parameter and can be referenced using
117//             using that name in subsequent DAGs in the same
118//             operation. The scope of a temp is the operation. If a variable
119//             with the given name already exists, an error will be given at
120//             tblgen time.
121// example: [(save_temp $var, (call "foo", $p0)),
122//           (op "+", $var, $p1)] ->
123//              "int32x2_t __var = foo(__p0); return __var + __p1;"
124def save_temp;
125// name_replace - Return the name of the current intrinsic with the first
126//                argument replaced by the second argument. Raises an error if
127//                the first argument does not exist in the intrinsic name.
128// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
129//            version of this intrinsic).
130def name_replace;
131// literal - Create a literal piece of code. The code is treated as a raw
132//           string, and must be given a type. The type is a stdint.h or
133//           NEON intrinsic type as given to (cast).
134// example: (literal "int32_t", "0")
135def literal;
136// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
137//           The MASK argument is a set of elements. The elements are generated
138//           from the two special defs "mask0" and "mask1". "mask0" expands to
139//           the lane indices in sequence for ARG0, and "mask1" expands to
140//           the lane indices in sequence for ARG1. They can be used as-is, e.g.
141//
142//             (shuffle $p0, $p1, mask0) -> $p0
143//             (shuffle $p0, $p1, mask1) -> $p1
144//
145//           or, more usefully, they can be manipulated using the SetTheory
146//           operators plus some extra operators defined in the NEON emitter.
147//           The operators are described below.
148// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
149//            A concatenation of the high halves of the input vectors.
150def shuffle;
151
152// add, interleave, decimate: These set operators are vanilla SetTheory
153// operators and take their normal definition.
154def add;
155def interleave;
156def decimate;
157// rotl - Rotate set left by a number of elements.
158// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
159def rotl;
160// rotl - Rotate set right by a number of elements.
161// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
162def rotr;
163// highhalf - Take only the high half of the input.
164// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
165def highhalf;
166// highhalf - Take only the low half of the input.
167// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
168def lowhalf;
169// rev - Perform a variable-width reversal of the elements. The zero'th argument
170//       is a width in bits to reverse. The lanes this maps to is determined
171//       based on the element width of the underlying type.
172// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
173// example: (rev 32, mask0) -> [1, 0, 3, 2]             (if 16-bit elements)
174def rev;
175// mask0 - The initial sequence of lanes for shuffle ARG0
176def mask0 : MaskExpand;
177// mask0 - The initial sequence of lanes for shuffle ARG1
178def mask1 : MaskExpand;
179
180def OP_NONE  : Operation;
181def OP_UNAVAILABLE : Operation {
182  let Unavailable = 1;
183}
184
185//===----------------------------------------------------------------------===//
186// Instruction definitions
187//===----------------------------------------------------------------------===//
188
189// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
190// a sequence of typespecs.
191//
192// The name is the base name of the intrinsic, for example "vget_lane". This is
193// then mangled by the tblgen backend to add type information ("vget_lane_s16").
194//
195// A typespec is a sequence of uppercase characters (modifiers) followed by one
196// lowercase character. A typespec encodes a particular "base type" of the
197// intrinsic.
198//
199// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
200// typespec codes are given below.
201//
202// The string given to an Inst class is a sequence of typespecs. The intrinsic
203// is instantiated for every typespec in the sequence. For example "sdQsQd".
204//
205// The prototype is a string that defines the return type of the intrinsic
206// and the type of each argument. The return type and every argument gets a
207// set of "modifiers" that can change in some way the "base type" of the
208// intrinsic.
209//
210// Typespecs
211// ---------
212// c: char
213// s: short
214// i: int
215// l: long
216// k: 128-bit long
217// f: float
218// h: half-float
219// d: double
220// b: bfloat16
221// m: mfloat8
222//
223// Typespec modifiers
224// ------------------
225// S: scalar, only used for function mangling.
226// U: unsigned
227// Q: 128b
228// H: 128b without mangling 'q'
229// P: polynomial
230//
231// Prototype modifiers
232// -------------------
233// prototype: return (arg, arg, ...)
234//
235// Each type modifier is either a single character, or a group surrounded by
236// parentheses.
237//
238// .: default
239// v: change to void category.
240// S: change to signed integer category.
241// U: change to unsigned integer category.
242// F: change to floating category.
243// B: change to BFloat16
244// P: change to polynomial category.
245// p: change polynomial to equivalent integer category. Otherwise nop.
246// V: change to fpm_t
247//
248// >: double element width (vector size unchanged).
249// <: half element width (vector size unchanged).
250//
251// 1: change to scalar.
252// 2: change to struct of two vectors.
253// 3: change to struct of three vectors.
254// 4: change to struct of four vectors.
255//
256// *: make a pointer argument.
257// c: make a constant argument (for pointers).
258//
259// Q: force 128-bit width.
260// q: force 64-bit width.
261//
262// I: make 32-bit signed scalar immediate
263// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call.
264
265
266// Every intrinsic subclasses Inst.
267class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
268  string Name = n;
269  string Prototype = p;
270  string Types = t;
271  string ArchGuard = "";
272  string TargetGuard = "neon";
273
274  Operation Operation = o;
275  bit BigEndianSafe = 0;
276  bit isShift = 0;
277  bit isScalarShift = 0;
278  bit isScalarNarrowShift = 0;
279  list<ImmCheck> ImmChecks = ch;
280
281  // Certain intrinsics have different names than their representative
282  // instructions. This field allows us to handle this correctly when we
283  // are generating tests.
284  string InstName = "";
285
286  // Certain intrinsics even though they are not a WOpInst or LOpInst,
287  // generate a WOpInst/LOpInst instruction (see below for definition
288  // of a WOpInst/LOpInst). For testing purposes we need to know
289  // this. Ex: vset_lane which outputs vmov instructions.
290  bit isHiddenWInst = 0;
291  bit isHiddenLInst = 0;
292
293  string CartesianProductWith = "";
294}
295
296// The following instruction classes are implemented via builtins.
297// These declarations are used to generate Builtins.def:
298//
299// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
300// IInst: Instruction with generic integer suffix (e.g., "i8")
301// WInst: Instruction with only bit size suffix (e.g., "8")
302class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
303class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
304class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
305class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
306
307// The following instruction classes are implemented via operators
308// instead of builtins. As such these declarations are only used for
309// the purpose of generating tests.
310//
311// SOpInst:       Instruction with signed/unsigned suffix (e.g., "s8",
312//                "u8", "p8").
313// IOpInst:       Instruction with generic integer suffix (e.g., "i8").
314// WOpInst:       Instruction with bit size only suffix (e.g., "8").
315// LOpInst:       Logical instruction with no bit size suffix.
316// NoTestOpInst:  Intrinsic that has no corresponding instruction.
317class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
318class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
319class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
320class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
321class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
322