1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "ssa.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "langhooks.h"
44 #include "gimple-iterator.h"
45 #include "case-cfn-macros.h"
46 #include "emit-rtl.h"
47 #include "stringpool.h"
48 #include "attribs.h"
49 #include "gimple-fold.h"
50
51 #define v8qi_UP E_V8QImode
52 #define v8di_UP E_V8DImode
53 #define v4hi_UP E_V4HImode
54 #define v4hf_UP E_V4HFmode
55 #define v2si_UP E_V2SImode
56 #define v2sf_UP E_V2SFmode
57 #define v1df_UP E_V1DFmode
58 #define di_UP E_DImode
59 #define df_UP E_DFmode
60 #define v16qi_UP E_V16QImode
61 #define v8hi_UP E_V8HImode
62 #define v8hf_UP E_V8HFmode
63 #define v4si_UP E_V4SImode
64 #define v4sf_UP E_V4SFmode
65 #define v2di_UP E_V2DImode
66 #define v2df_UP E_V2DFmode
67 #define ti_UP E_TImode
68 #define oi_UP E_OImode
69 #define ci_UP E_CImode
70 #define xi_UP E_XImode
71 #define si_UP E_SImode
72 #define sf_UP E_SFmode
73 #define hi_UP E_HImode
74 #define hf_UP E_HFmode
75 #define qi_UP E_QImode
76 #define bf_UP E_BFmode
77 #define v4bf_UP E_V4BFmode
78 #define v8bf_UP E_V8BFmode
79 #define v2x8qi_UP E_V2x8QImode
80 #define v2x4hi_UP E_V2x4HImode
81 #define v2x4hf_UP E_V2x4HFmode
82 #define v2x4bf_UP E_V2x4BFmode
83 #define v2x2si_UP E_V2x2SImode
84 #define v2x2sf_UP E_V2x2SFmode
85 #define v2x1di_UP E_V2x1DImode
86 #define v2x1df_UP E_V2x1DFmode
87 #define v2x16qi_UP E_V2x16QImode
88 #define v2x8hi_UP E_V2x8HImode
89 #define v2x8hf_UP E_V2x8HFmode
90 #define v2x8bf_UP E_V2x8BFmode
91 #define v2x4si_UP E_V2x4SImode
92 #define v2x4sf_UP E_V2x4SFmode
93 #define v2x2di_UP E_V2x2DImode
94 #define v2x2df_UP E_V2x2DFmode
95 #define v3x8qi_UP E_V3x8QImode
96 #define v3x4hi_UP E_V3x4HImode
97 #define v3x4hf_UP E_V3x4HFmode
98 #define v3x4bf_UP E_V3x4BFmode
99 #define v3x2si_UP E_V3x2SImode
100 #define v3x2sf_UP E_V3x2SFmode
101 #define v3x1di_UP E_V3x1DImode
102 #define v3x1df_UP E_V3x1DFmode
103 #define v3x16qi_UP E_V3x16QImode
104 #define v3x8hi_UP E_V3x8HImode
105 #define v3x8hf_UP E_V3x8HFmode
106 #define v3x8bf_UP E_V3x8BFmode
107 #define v3x4si_UP E_V3x4SImode
108 #define v3x4sf_UP E_V3x4SFmode
109 #define v3x2di_UP E_V3x2DImode
110 #define v3x2df_UP E_V3x2DFmode
111 #define v4x8qi_UP E_V4x8QImode
112 #define v4x4hi_UP E_V4x4HImode
113 #define v4x4hf_UP E_V4x4HFmode
114 #define v4x4bf_UP E_V4x4BFmode
115 #define v4x2si_UP E_V4x2SImode
116 #define v4x2sf_UP E_V4x2SFmode
117 #define v4x1di_UP E_V4x1DImode
118 #define v4x1df_UP E_V4x1DFmode
119 #define v4x16qi_UP E_V4x16QImode
120 #define v4x8hi_UP E_V4x8HImode
121 #define v4x8hf_UP E_V4x8HFmode
122 #define v4x8bf_UP E_V4x8BFmode
123 #define v4x4si_UP E_V4x4SImode
124 #define v4x4sf_UP E_V4x4SFmode
125 #define v4x2di_UP E_V4x2DImode
126 #define v4x2df_UP E_V4x2DFmode
127 #define UP(X) X##_UP
128
129 #define SIMD_MAX_BUILTIN_ARGS 5
130
131 enum aarch64_type_qualifiers
132 {
133 /* T foo. */
134 qualifier_none = 0x0,
135 /* unsigned T foo. */
136 qualifier_unsigned = 0x1, /* 1 << 0 */
137 /* const T foo. */
138 qualifier_const = 0x2, /* 1 << 1 */
139 /* T *foo. */
140 qualifier_pointer = 0x4, /* 1 << 2 */
141 /* Used when expanding arguments if an operand could
142 be an immediate. */
143 qualifier_immediate = 0x8, /* 1 << 3 */
144 qualifier_maybe_immediate = 0x10, /* 1 << 4 */
145 /* void foo (...). */
146 qualifier_void = 0x20, /* 1 << 5 */
147 /* Some patterns may have internal operands, this qualifier is an
148 instruction to the initialisation code to skip this operand. */
149 qualifier_internal = 0x40, /* 1 << 6 */
150 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
151 rather than using the type of the operand. */
152 qualifier_map_mode = 0x80, /* 1 << 7 */
153 /* qualifier_pointer | qualifier_map_mode */
154 qualifier_pointer_map_mode = 0x84,
155 /* qualifier_const | qualifier_pointer | qualifier_map_mode */
156 qualifier_const_pointer_map_mode = 0x86,
157 /* Polynomial types. */
158 qualifier_poly = 0x100,
159 /* Lane indices - must be in range, and flipped for bigendian. */
160 qualifier_lane_index = 0x200,
161 /* Lane indices for single lane structure loads and stores. */
162 qualifier_struct_load_store_lane_index = 0x400,
163 /* Lane indices selected in pairs. - must be in range, and flipped for
164 bigendian. */
165 qualifier_lane_pair_index = 0x800,
166 /* Lane indices selected in quadtuplets. - must be in range, and flipped for
167 bigendian. */
168 qualifier_lane_quadtup_index = 0x1000,
169 };
170
171 /* Flags that describe what a function might do. */
172 const unsigned int FLAG_NONE = 0U;
173 const unsigned int FLAG_READ_FPCR = 1U << 0;
174 const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
175 const unsigned int FLAG_READ_MEMORY = 1U << 2;
176 const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
177 const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
178
179 /* Not all FP intrinsics raise FP exceptions or read FPCR register,
180 use this flag to suppress it. */
181 const unsigned int FLAG_AUTO_FP = 1U << 5;
182
183 const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
184 const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
185 | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
186 const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
187 const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
188
189 typedef struct
190 {
191 const char *name;
192 machine_mode mode;
193 const enum insn_code code;
194 unsigned int fcode;
195 enum aarch64_type_qualifiers *qualifiers;
196 unsigned int flags;
197 } aarch64_simd_builtin_datum;
198
199 static enum aarch64_type_qualifiers
200 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
201 = { qualifier_none, qualifier_none };
202 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
203 static enum aarch64_type_qualifiers
204 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
205 = { qualifier_unsigned, qualifier_unsigned };
206 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
207 static enum aarch64_type_qualifiers
208 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
209 = { qualifier_unsigned, qualifier_none };
210 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
211 static enum aarch64_type_qualifiers
212 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
213 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
214 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
215 static enum aarch64_type_qualifiers
216 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
217 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
218 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
219 static enum aarch64_type_qualifiers
220 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
221 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
222 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
223 static enum aarch64_type_qualifiers
224 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
225 = { qualifier_none, qualifier_none, qualifier_unsigned };
226 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
227 static enum aarch64_type_qualifiers
228 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
229 = { qualifier_unsigned, qualifier_none, qualifier_none };
230 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
231 static enum aarch64_type_qualifiers
232 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
233 = { qualifier_poly, qualifier_poly, qualifier_poly };
234 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
235 static enum aarch64_type_qualifiers
236 aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
237 = { qualifier_poly, qualifier_poly, qualifier_unsigned };
238 #define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
239
240 static enum aarch64_type_qualifiers
241 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
242 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
243 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
244 static enum aarch64_type_qualifiers
245 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
246 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
247 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
248 static enum aarch64_type_qualifiers
249 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
250 = { qualifier_unsigned, qualifier_unsigned,
251 qualifier_unsigned, qualifier_unsigned };
252 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
253 static enum aarch64_type_qualifiers
254 aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
255 = { qualifier_unsigned, qualifier_unsigned,
256 qualifier_unsigned, qualifier_lane_index };
257 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
258 static enum aarch64_type_qualifiers
259 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
260 = { qualifier_unsigned, qualifier_unsigned,
261 qualifier_unsigned, qualifier_immediate };
262 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
263 static enum aarch64_type_qualifiers
264 aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
265 = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
266 #define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
267 static enum aarch64_type_qualifiers
268 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
269 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
270 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
271 static enum aarch64_type_qualifiers
272 aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
273 = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
274 #define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
275 static enum aarch64_type_qualifiers
276 aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
277 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
278 #define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
279
280 static enum aarch64_type_qualifiers
281 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
282 = { qualifier_none, qualifier_none, qualifier_none,
283 qualifier_none, qualifier_lane_pair_index };
284 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
285 static enum aarch64_type_qualifiers
286 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
287 = { qualifier_none, qualifier_none, qualifier_none,
288 qualifier_none, qualifier_lane_index };
289 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
290 static enum aarch64_type_qualifiers
291 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
292 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
293 qualifier_unsigned, qualifier_lane_index };
294 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
295
296 static enum aarch64_type_qualifiers
297 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
298 = { qualifier_none, qualifier_none, qualifier_unsigned,
299 qualifier_none, qualifier_lane_quadtup_index };
300 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
301 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
302 static enum aarch64_type_qualifiers
303 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
304 = { qualifier_none, qualifier_none, qualifier_none,
305 qualifier_unsigned, qualifier_lane_quadtup_index };
306 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
307 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
308
309 static enum aarch64_type_qualifiers
310 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
311 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
312 qualifier_unsigned, qualifier_immediate };
313 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
314
315 static enum aarch64_type_qualifiers
316 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
317 = { qualifier_none, qualifier_none, qualifier_immediate };
318 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
319 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
320 static enum aarch64_type_qualifiers
321 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
322 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
323 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
324 static enum aarch64_type_qualifiers
325 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
326 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
327 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
328 static enum aarch64_type_qualifiers
329 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
330 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
331 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
332 #define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
333 static enum aarch64_type_qualifiers
334 aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
335 = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
336 #define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
337
338 static enum aarch64_type_qualifiers
339 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
340 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
341 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
342 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
343 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
344 #define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
345
346 static enum aarch64_type_qualifiers
347 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
348 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
349 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
350
351 static enum aarch64_type_qualifiers
352 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
353 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
354 qualifier_immediate };
355 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
356
357 static enum aarch64_type_qualifiers
358 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
359 = { qualifier_none, qualifier_const_pointer_map_mode };
360 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
361 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
362 static enum aarch64_type_qualifiers
363 aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
364 = { qualifier_unsigned, qualifier_const_pointer_map_mode };
365 #define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
366 #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
367 static enum aarch64_type_qualifiers
368 aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
369 = { qualifier_poly, qualifier_const_pointer_map_mode };
370 #define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
371 #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
372
373 static enum aarch64_type_qualifiers
374 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
375 = { qualifier_none, qualifier_const_pointer_map_mode,
376 qualifier_none, qualifier_struct_load_store_lane_index };
377 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
378 static enum aarch64_type_qualifiers
379 aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
380 = { qualifier_unsigned, qualifier_const_pointer_map_mode,
381 qualifier_unsigned, qualifier_struct_load_store_lane_index };
382 #define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
383 static enum aarch64_type_qualifiers
384 aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
385 = { qualifier_poly, qualifier_const_pointer_map_mode,
386 qualifier_poly, qualifier_struct_load_store_lane_index };
387 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
388
389 static enum aarch64_type_qualifiers
390 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
391 = { qualifier_poly, qualifier_unsigned,
392 qualifier_poly, qualifier_poly };
393 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
394 static enum aarch64_type_qualifiers
395 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
396 = { qualifier_none, qualifier_unsigned,
397 qualifier_none, qualifier_none };
398 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
399 static enum aarch64_type_qualifiers
400 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
401 = { qualifier_unsigned, qualifier_unsigned,
402 qualifier_unsigned, qualifier_unsigned };
403 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
404
405 /* The first argument (return type) of a store should be void type,
406 which we represent with qualifier_void. Their first operand will be
407 a DImode pointer to the location to store to, so we must use
408 qualifier_map_mode | qualifier_pointer to build a pointer to the
409 element type of the vector. */
410 static enum aarch64_type_qualifiers
411 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
412 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
413 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
414 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
415 static enum aarch64_type_qualifiers
416 aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
417 = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
418 #define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
419 #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
420 static enum aarch64_type_qualifiers
421 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
422 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
423 #define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
424 #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
425
426 static enum aarch64_type_qualifiers
427 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
428 = { qualifier_void, qualifier_pointer_map_mode,
429 qualifier_none, qualifier_struct_load_store_lane_index };
430 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
431 static enum aarch64_type_qualifiers
432 aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
433 = { qualifier_void, qualifier_pointer_map_mode,
434 qualifier_unsigned, qualifier_struct_load_store_lane_index };
435 #define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
436 static enum aarch64_type_qualifiers
437 aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
438 = { qualifier_void, qualifier_pointer_map_mode,
439 qualifier_poly, qualifier_struct_load_store_lane_index };
440 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
441
442 #define CF0(N, X) CODE_FOR_aarch64_##N##X
443 #define CF1(N, X) CODE_FOR_##N##X##1
444 #define CF2(N, X) CODE_FOR_##N##X##2
445 #define CF3(N, X) CODE_FOR_##N##X##3
446 #define CF4(N, X) CODE_FOR_##N##X##4
447 #define CF10(N, X) CODE_FOR_##N##X
448
449 #define VAR1(T, N, MAP, FLAG, A) \
450 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
451 #define VAR2(T, N, MAP, FLAG, A, B) \
452 VAR1 (T, N, MAP, FLAG, A) \
453 VAR1 (T, N, MAP, FLAG, B)
454 #define VAR3(T, N, MAP, FLAG, A, B, C) \
455 VAR2 (T, N, MAP, FLAG, A, B) \
456 VAR1 (T, N, MAP, FLAG, C)
457 #define VAR4(T, N, MAP, FLAG, A, B, C, D) \
458 VAR3 (T, N, MAP, FLAG, A, B, C) \
459 VAR1 (T, N, MAP, FLAG, D)
460 #define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
461 VAR4 (T, N, MAP, FLAG, A, B, C, D) \
462 VAR1 (T, N, MAP, FLAG, E)
463 #define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
464 VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
465 VAR1 (T, N, MAP, FLAG, F)
466 #define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
467 VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
468 VAR1 (T, N, MAP, FLAG, G)
469 #define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
470 VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
471 VAR1 (T, N, MAP, FLAG, H)
472 #define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
473 VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
474 VAR1 (T, N, MAP, FLAG, I)
475 #define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
476 VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
477 VAR1 (T, N, MAP, FLAG, J)
478 #define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
479 VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
480 VAR1 (T, N, MAP, FLAG, K)
481 #define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
482 VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
483 VAR1 (T, N, MAP, FLAG, L)
484 #define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
485 VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
486 VAR1 (T, N, MAP, FLAG, M)
487 #define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
488 VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
489 VAR1 (T, X, MAP, FLAG, N)
490 #define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
491 VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
492 VAR1 (T, X, MAP, FLAG, O)
493 #define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
494 VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
495 VAR1 (T, X, MAP, FLAG, P)
496
497 #include "aarch64-builtin-iterators.h"
498
499 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
500 #include "aarch64-simd-builtins.def"
501 };
502
503 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
504 #define AARCH64_CRC32_BUILTINS \
505 CRC32_BUILTIN (crc32b, QI) \
506 CRC32_BUILTIN (crc32h, HI) \
507 CRC32_BUILTIN (crc32w, SI) \
508 CRC32_BUILTIN (crc32x, DI) \
509 CRC32_BUILTIN (crc32cb, QI) \
510 CRC32_BUILTIN (crc32ch, HI) \
511 CRC32_BUILTIN (crc32cw, SI) \
512 CRC32_BUILTIN (crc32cx, DI)
513
514 /* The next 8 FCMLA instrinsics require some special handling compared the
515 normal simd intrinsics. */
516 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
517 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
518 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
519 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
520 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
521 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
522 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
523 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
524 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
525
526 typedef struct
527 {
528 const char *name;
529 machine_mode mode;
530 const enum insn_code icode;
531 unsigned int fcode;
532 } aarch64_crc_builtin_datum;
533
534 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
535 typedef struct
536 {
537 const char *name;
538 machine_mode mode;
539 const enum insn_code icode;
540 unsigned int fcode;
541 bool lane;
542 } aarch64_fcmla_laneq_builtin_datum;
543
544 #define CRC32_BUILTIN(N, M) \
545 AARCH64_BUILTIN_##N,
546
547 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
548 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
549
550 #undef VAR1
551 #define VAR1(T, N, MAP, FLAG, A) \
552 AARCH64_SIMD_BUILTIN_##T##_##N##A,
553
554 enum aarch64_builtins
555 {
556 AARCH64_BUILTIN_MIN,
557
558 AARCH64_BUILTIN_GET_FPCR,
559 AARCH64_BUILTIN_SET_FPCR,
560 AARCH64_BUILTIN_GET_FPSR,
561 AARCH64_BUILTIN_SET_FPSR,
562
563 AARCH64_BUILTIN_GET_FPCR64,
564 AARCH64_BUILTIN_SET_FPCR64,
565 AARCH64_BUILTIN_GET_FPSR64,
566 AARCH64_BUILTIN_SET_FPSR64,
567
568 AARCH64_BUILTIN_RSQRT_DF,
569 AARCH64_BUILTIN_RSQRT_SF,
570 AARCH64_BUILTIN_RSQRT_V2DF,
571 AARCH64_BUILTIN_RSQRT_V2SF,
572 AARCH64_BUILTIN_RSQRT_V4SF,
573 AARCH64_SIMD_BUILTIN_BASE,
574 AARCH64_SIMD_BUILTIN_LANE_CHECK,
575 #include "aarch64-simd-builtins.def"
576 /* The first enum element which is based on an insn_data pattern. */
577 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
578 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
579 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
580 AARCH64_CRC32_BUILTIN_BASE,
581 AARCH64_CRC32_BUILTINS
582 AARCH64_CRC32_BUILTIN_MAX,
583 /* ARMv8.3-A Pointer Authentication Builtins. */
584 AARCH64_PAUTH_BUILTIN_AUTIA1716,
585 AARCH64_PAUTH_BUILTIN_PACIA1716,
586 AARCH64_PAUTH_BUILTIN_AUTIB1716,
587 AARCH64_PAUTH_BUILTIN_PACIB1716,
588 AARCH64_PAUTH_BUILTIN_XPACLRI,
589 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
590 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
591 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
592 /* Builtin for Arm8.3-a Javascript conversion instruction. */
593 AARCH64_JSCVT,
594 /* TME builtins. */
595 AARCH64_TME_BUILTIN_TSTART,
596 AARCH64_TME_BUILTIN_TCOMMIT,
597 AARCH64_TME_BUILTIN_TTEST,
598 AARCH64_TME_BUILTIN_TCANCEL,
599 /* Armv8.5-a RNG instruction builtins. */
600 AARCH64_BUILTIN_RNG_RNDR,
601 AARCH64_BUILTIN_RNG_RNDRRS,
602 /* MEMTAG builtins. */
603 AARCH64_MEMTAG_BUILTIN_START,
604 AARCH64_MEMTAG_BUILTIN_IRG,
605 AARCH64_MEMTAG_BUILTIN_GMI,
606 AARCH64_MEMTAG_BUILTIN_SUBP,
607 AARCH64_MEMTAG_BUILTIN_INC_TAG,
608 AARCH64_MEMTAG_BUILTIN_SET_TAG,
609 AARCH64_MEMTAG_BUILTIN_GET_TAG,
610 AARCH64_MEMTAG_BUILTIN_END,
611 /* LS64 builtins. */
612 AARCH64_LS64_BUILTIN_LD64B,
613 AARCH64_LS64_BUILTIN_ST64B,
614 AARCH64_LS64_BUILTIN_ST64BV,
615 AARCH64_LS64_BUILTIN_ST64BV0,
616 AARCH64_REV16,
617 AARCH64_REV16L,
618 AARCH64_REV16LL,
619 AARCH64_RBIT,
620 AARCH64_RBITL,
621 AARCH64_RBITLL,
622 AARCH64_BUILTIN_MAX
623 };
624
625 #undef CRC32_BUILTIN
626 #define CRC32_BUILTIN(N, M) \
627 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
628
629 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
630 AARCH64_CRC32_BUILTINS
631 };
632
633
634 #undef FCMLA_LANEQ_BUILTIN
635 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
636 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
637 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
638
639 /* This structure contains how to manage the mapping form the builtin to the
640 instruction to generate in the backend and how to invoke the instruction. */
641 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
642 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
643 };
644
645 #undef CRC32_BUILTIN
646
647 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
648
649 #define NUM_DREG_TYPES 6
650 #define NUM_QREG_TYPES 6
651
652 /* Internal scalar builtin types. These types are used to support
653 neon intrinsic builtins. They are _not_ user-visible types. Therefore
654 the mangling for these types are implementation defined. */
655 const char *aarch64_scalar_builtin_types[] = {
656 "__builtin_aarch64_simd_qi",
657 "__builtin_aarch64_simd_hi",
658 "__builtin_aarch64_simd_si",
659 "__builtin_aarch64_simd_hf",
660 "__builtin_aarch64_simd_sf",
661 "__builtin_aarch64_simd_di",
662 "__builtin_aarch64_simd_df",
663 "__builtin_aarch64_simd_poly8",
664 "__builtin_aarch64_simd_poly16",
665 "__builtin_aarch64_simd_poly64",
666 "__builtin_aarch64_simd_poly128",
667 "__builtin_aarch64_simd_ti",
668 "__builtin_aarch64_simd_uqi",
669 "__builtin_aarch64_simd_uhi",
670 "__builtin_aarch64_simd_usi",
671 "__builtin_aarch64_simd_udi",
672 "__builtin_aarch64_simd_ei",
673 "__builtin_aarch64_simd_oi",
674 "__builtin_aarch64_simd_ci",
675 "__builtin_aarch64_simd_xi",
676 "__builtin_aarch64_simd_bf",
677 NULL
678 };
679
680 #define ENTRY(E, M, Q, G) E,
681 enum aarch64_simd_type
682 {
683 #include "aarch64-simd-builtin-types.def"
684 ARM_NEON_H_TYPES_LAST
685 };
686 #undef ENTRY
687
688 struct GTY(()) aarch64_simd_type_info
689 {
690 enum aarch64_simd_type type;
691
692 /* Internal type name. */
693 const char *name;
694
695 /* Internal type name(mangled). The mangled names conform to the
696 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
697 Appendix A). To qualify for emission with the mangled names defined in
698 that document, a vector type must not only be of the correct mode but also
699 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
700 types are registered by aarch64_init_simd_builtin_types (). In other
701 words, vector types defined in other ways e.g. via vector_size attribute
702 will get default mangled names. */
703 const char *mangle;
704
705 /* Internal type. */
706 tree itype;
707
708 /* Element type. */
709 tree eltype;
710
711 /* Machine mode the internal type maps to. */
712 enum machine_mode mode;
713
714 /* Qualifiers. */
715 enum aarch64_type_qualifiers q;
716 };
717
718 #define ENTRY(E, M, Q, G) \
719 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
720 static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
721 #include "aarch64-simd-builtin-types.def"
722 };
723 #undef ENTRY
724
725 static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
726 static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
727
728 static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
729 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
730 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
731
732 /* The user-visible __fp16 type, and a pointer to that type. Used
733 across the back-end. */
734 tree aarch64_fp16_type_node = NULL_TREE;
735 tree aarch64_fp16_ptr_type_node = NULL_TREE;
736
737 /* Back-end node type for brain float (bfloat) types. */
738 tree aarch64_bf16_type_node = NULL_TREE;
739 tree aarch64_bf16_ptr_type_node = NULL_TREE;
740
741 /* Wrapper around add_builtin_function. NAME is the name of the built-in
742 function, TYPE is the function type, CODE is the function subcode
743 (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
744 attributes. */
745 static tree
aarch64_general_add_builtin(const char * name,tree type,unsigned int code,tree attrs=NULL_TREE)746 aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
747 tree attrs = NULL_TREE)
748 {
749 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
750 return add_builtin_function (name, type, code, BUILT_IN_MD,
751 NULL, attrs);
752 }
753
754 static tree
aarch64_general_simulate_builtin(const char * name,tree fntype,unsigned int code,tree attrs=NULL_TREE)755 aarch64_general_simulate_builtin (const char *name, tree fntype,
756 unsigned int code,
757 tree attrs = NULL_TREE)
758 {
759 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
760 return simulate_builtin_function_decl (input_location, name, fntype,
761 code, NULL, attrs);
762 }
763
764 static const char *
aarch64_mangle_builtin_scalar_type(const_tree type)765 aarch64_mangle_builtin_scalar_type (const_tree type)
766 {
767 int i = 0;
768
769 while (aarch64_scalar_builtin_types[i] != NULL)
770 {
771 const char *name = aarch64_scalar_builtin_types[i];
772
773 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
774 && DECL_NAME (TYPE_NAME (type))
775 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
776 return aarch64_scalar_builtin_types[i];
777 i++;
778 }
779 return NULL;
780 }
781
782 static const char *
aarch64_mangle_builtin_vector_type(const_tree type)783 aarch64_mangle_builtin_vector_type (const_tree type)
784 {
785 tree attrs = TYPE_ATTRIBUTES (type);
786 if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
787 {
788 tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
789 return IDENTIFIER_POINTER (mangled_name);
790 }
791
792 return NULL;
793 }
794
795 const char *
aarch64_general_mangle_builtin_type(const_tree type)796 aarch64_general_mangle_builtin_type (const_tree type)
797 {
798 const char *mangle;
799 /* Walk through all the AArch64 builtins types tables to filter out the
800 incoming type. */
801 if ((mangle = aarch64_mangle_builtin_vector_type (type))
802 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
803 return mangle;
804
805 return NULL;
806 }
807
808 static tree
aarch64_simd_builtin_std_type(machine_mode mode,enum aarch64_type_qualifiers q)809 aarch64_simd_builtin_std_type (machine_mode mode,
810 enum aarch64_type_qualifiers q)
811 {
812 #define QUAL_TYPE(M) \
813 ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
814 switch (mode)
815 {
816 case E_QImode:
817 return QUAL_TYPE (QI);
818 case E_HImode:
819 return QUAL_TYPE (HI);
820 case E_SImode:
821 return QUAL_TYPE (SI);
822 case E_DImode:
823 return QUAL_TYPE (DI);
824 case E_TImode:
825 return QUAL_TYPE (TI);
826 case E_OImode:
827 return aarch64_simd_intOI_type_node;
828 case E_CImode:
829 return aarch64_simd_intCI_type_node;
830 case E_XImode:
831 return aarch64_simd_intXI_type_node;
832 case E_HFmode:
833 return aarch64_fp16_type_node;
834 case E_SFmode:
835 return float_type_node;
836 case E_DFmode:
837 return double_type_node;
838 case E_BFmode:
839 return aarch64_bf16_type_node;
840 default:
841 gcc_unreachable ();
842 }
843 #undef QUAL_TYPE
844 }
845
846 static tree
aarch64_lookup_simd_builtin_type(machine_mode mode,enum aarch64_type_qualifiers q)847 aarch64_lookup_simd_builtin_type (machine_mode mode,
848 enum aarch64_type_qualifiers q)
849 {
850 int i;
851 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
852
853 /* Non-poly scalar modes map to standard types not in the table. */
854 if (q != qualifier_poly && !VECTOR_MODE_P (mode))
855 return aarch64_simd_builtin_std_type (mode, q);
856
857 for (i = 0; i < nelts; i++)
858 {
859 if (aarch64_simd_types[i].mode == mode
860 && aarch64_simd_types[i].q == q)
861 return aarch64_simd_types[i].itype;
862 if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
863 for (int j = 0; j < 3; j++)
864 if (aarch64_simd_tuple_modes[i][j] == mode
865 && aarch64_simd_types[i].q == q)
866 return aarch64_simd_tuple_types[i][j];
867 }
868
869 return NULL_TREE;
870 }
871
872 static tree
aarch64_simd_builtin_type(machine_mode mode,bool unsigned_p,bool poly_p)873 aarch64_simd_builtin_type (machine_mode mode,
874 bool unsigned_p, bool poly_p)
875 {
876 if (poly_p)
877 return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
878 else if (unsigned_p)
879 return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
880 else
881 return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
882 }
883
884 static void
aarch64_init_simd_builtin_types(void)885 aarch64_init_simd_builtin_types (void)
886 {
887 int i;
888 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
889 tree tdecl;
890
891 /* Init all the element types built by the front-end. */
892 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
893 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
894 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
895 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
896 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
897 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
898 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
899 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
900 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
901 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
902 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
903 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
904 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
905 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
906 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
907 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
908
909 /* Poly types are a world of their own. */
910 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
911 build_distinct_type_copy (unsigned_intQI_type_node);
912 /* Prevent front-ends from transforming Poly8_t arrays into string
913 literals. */
914 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
915
916 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
917 build_distinct_type_copy (unsigned_intHI_type_node);
918 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
919 build_distinct_type_copy (unsigned_intDI_type_node);
920 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
921 build_distinct_type_copy (unsigned_intTI_type_node);
922 /* Init poly vector element types with scalar poly types. */
923 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
924 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
925 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
926 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
927 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
928 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
929
930 /* Continue with standard types. */
931 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
932 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
933 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
934 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
935 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
936 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
937
938 /* Init Bfloat vector types with underlying __bf16 type. */
939 aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
940 aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
941
942 for (i = 0; i < nelts; i++)
943 {
944 tree eltype = aarch64_simd_types[i].eltype;
945 machine_mode mode = aarch64_simd_types[i].mode;
946
947 if (aarch64_simd_types[i].itype == NULL)
948 {
949 tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
950 type = build_distinct_type_copy (type);
951 SET_TYPE_STRUCTURAL_EQUALITY (type);
952
953 tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
954 tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
955 TYPE_ATTRIBUTES (type)
956 = tree_cons (get_identifier ("Advanced SIMD type"), value,
957 TYPE_ATTRIBUTES (type));
958 aarch64_simd_types[i].itype = type;
959 }
960
961 tdecl = add_builtin_type (aarch64_simd_types[i].name,
962 aarch64_simd_types[i].itype);
963 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
964 }
965
966 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
967 make_signed_type (GET_MODE_PRECISION (mode));
968 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
969 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
970 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
971 #undef AARCH64_BUILD_SIGNED_TYPE
972
973 tdecl = add_builtin_type
974 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
975 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
976 tdecl = add_builtin_type
977 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
978 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
979 tdecl = add_builtin_type
980 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
981 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
982 }
983
984 static void
aarch64_init_simd_builtin_scalar_types(void)985 aarch64_init_simd_builtin_scalar_types (void)
986 {
987 /* Define typedefs for all the standard scalar types. */
988 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
989 "__builtin_aarch64_simd_qi");
990 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
991 "__builtin_aarch64_simd_hi");
992 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
993 "__builtin_aarch64_simd_hf");
994 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
995 "__builtin_aarch64_simd_si");
996 (*lang_hooks.types.register_builtin_type) (float_type_node,
997 "__builtin_aarch64_simd_sf");
998 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
999 "__builtin_aarch64_simd_di");
1000 (*lang_hooks.types.register_builtin_type) (double_type_node,
1001 "__builtin_aarch64_simd_df");
1002 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1003 "__builtin_aarch64_simd_poly8");
1004 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1005 "__builtin_aarch64_simd_poly16");
1006 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1007 "__builtin_aarch64_simd_poly64");
1008 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
1009 "__builtin_aarch64_simd_poly128");
1010 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
1011 "__builtin_aarch64_simd_ti");
1012 (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
1013 "__builtin_aarch64_simd_bf");
1014 /* Unsigned integer types for various mode sizes. */
1015 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1016 "__builtin_aarch64_simd_uqi");
1017 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1018 "__builtin_aarch64_simd_uhi");
1019 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
1020 "__builtin_aarch64_simd_usi");
1021 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1022 "__builtin_aarch64_simd_udi");
1023 }
1024
1025 /* Return a set of FLAG_* flags derived from FLAGS
1026 that describe what a function with result MODE could do,
1027 taking the command-line flags into account. */
1028 static unsigned int
aarch64_call_properties(unsigned int flags,machine_mode mode)1029 aarch64_call_properties (unsigned int flags, machine_mode mode)
1030 {
1031 if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
1032 flags |= FLAG_FP;
1033
1034 /* -fno-trapping-math means that we can assume any FP exceptions
1035 are not user-visible. */
1036 if (!flag_trapping_math)
1037 flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
1038
1039 return flags;
1040 }
1041
1042 /* Return true if calls to a function with flags F and mode MODE
1043 could modify some form of global state. */
1044 static bool
aarch64_modifies_global_state_p(unsigned int f,machine_mode mode)1045 aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
1046 {
1047 unsigned int flags = aarch64_call_properties (f, mode);
1048
1049 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1050 return true;
1051
1052 if (flags & FLAG_PREFETCH_MEMORY)
1053 return true;
1054
1055 return flags & FLAG_WRITE_MEMORY;
1056 }
1057
1058 /* Return true if calls to a function with flags F and mode MODE
1059 could read some form of global state. */
1060 static bool
aarch64_reads_global_state_p(unsigned int f,machine_mode mode)1061 aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
1062 {
1063 unsigned int flags = aarch64_call_properties (f, mode);
1064
1065 if (flags & FLAG_READ_FPCR)
1066 return true;
1067
1068 return flags & FLAG_READ_MEMORY;
1069 }
1070
1071 /* Return true if calls to a function with flags F and mode MODE
1072 could raise a signal. */
1073 static bool
aarch64_could_trap_p(unsigned int f,machine_mode mode)1074 aarch64_could_trap_p (unsigned int f, machine_mode mode)
1075 {
1076 unsigned int flags = aarch64_call_properties (f, mode);
1077
1078 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1079 return true;
1080
1081 if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
1082 return true;
1083
1084 return false;
1085 }
1086
1087 /* Add attribute NAME to ATTRS. */
1088 static tree
aarch64_add_attribute(const char * name,tree attrs)1089 aarch64_add_attribute (const char *name, tree attrs)
1090 {
1091 return tree_cons (get_identifier (name), NULL_TREE, attrs);
1092 }
1093
1094 /* Return the appropriate attributes for a function that has
1095 flags F and mode MODE. */
1096 static tree
aarch64_get_attributes(unsigned int f,machine_mode mode)1097 aarch64_get_attributes (unsigned int f, machine_mode mode)
1098 {
1099 tree attrs = NULL_TREE;
1100
1101 if (!aarch64_modifies_global_state_p (f, mode))
1102 {
1103 if (aarch64_reads_global_state_p (f, mode))
1104 attrs = aarch64_add_attribute ("pure", attrs);
1105 else
1106 attrs = aarch64_add_attribute ("const", attrs);
1107 }
1108
1109 if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
1110 attrs = aarch64_add_attribute ("nothrow", attrs);
1111
1112 return aarch64_add_attribute ("leaf", attrs);
1113 }
1114
1115 static bool aarch64_simd_builtins_initialized_p = false;
1116
1117 /* Due to the architecture not providing lane variant of the lane instructions
1118 for fcmla we can't use the standard simd builtin expansion code, but we
1119 still want the majority of the validation that would normally be done. */
1120
1121 void
aarch64_init_fcmla_laneq_builtins(void)1122 aarch64_init_fcmla_laneq_builtins (void)
1123 {
1124 unsigned int i = 0;
1125
1126 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
1127 {
1128 aarch64_fcmla_laneq_builtin_datum* d
1129 = &aarch64_fcmla_lane_builtin_data[i];
1130 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
1131 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1132 tree quadtype
1133 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
1134 tree lanetype
1135 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
1136 tree ftype = build_function_type_list (argtype, argtype, argtype,
1137 quadtype, lanetype, NULL_TREE);
1138 tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
1139 tree fndecl
1140 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1141
1142 aarch64_builtin_decls[d->fcode] = fndecl;
1143 }
1144 }
1145
1146 void
aarch64_init_simd_builtin_functions(bool called_from_pragma)1147 aarch64_init_simd_builtin_functions (bool called_from_pragma)
1148 {
1149 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
1150
1151 if (!called_from_pragma)
1152 {
1153 tree lane_check_fpr = build_function_type_list (void_type_node,
1154 size_type_node,
1155 size_type_node,
1156 intSI_type_node,
1157 NULL);
1158 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
1159 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
1160 lane_check_fpr,
1161 AARCH64_SIMD_BUILTIN_LANE_CHECK);
1162 }
1163
1164 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
1165 {
1166 bool print_type_signature_p = false;
1167 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
1168 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
1169 char namebuf[60];
1170 tree ftype = NULL;
1171 tree fndecl = NULL;
1172
1173 d->fcode = fcode;
1174
1175 /* We must track two variables here. op_num is
1176 the operand number as in the RTL pattern. This is
1177 required to access the mode (e.g. V4SF mode) of the
1178 argument, from which the base type can be derived.
1179 arg_num is an index in to the qualifiers data, which
1180 gives qualifiers to the type (e.g. const unsigned).
1181 The reason these two variables may differ by one is the
1182 void return type. While all return types take the 0th entry
1183 in the qualifiers array, there is no operand for them in the
1184 RTL pattern. */
1185 int op_num = insn_data[d->code].n_operands - 1;
1186 int arg_num = d->qualifiers[0] & qualifier_void
1187 ? op_num + 1
1188 : op_num;
1189 tree return_type = void_type_node, args = void_list_node;
1190 tree eltype;
1191
1192 int struct_mode_args = 0;
1193 for (int j = op_num; j >= 0; j--)
1194 {
1195 machine_mode op_mode = insn_data[d->code].operand[j].mode;
1196 if (aarch64_advsimd_struct_mode_p (op_mode))
1197 struct_mode_args++;
1198 }
1199
1200 if ((called_from_pragma && struct_mode_args == 0)
1201 || (!called_from_pragma && struct_mode_args > 0))
1202 continue;
1203
1204 /* Build a function type directly from the insn_data for this
1205 builtin. The build_function_type () function takes care of
1206 removing duplicates for us. */
1207 for (; op_num >= 0; arg_num--, op_num--)
1208 {
1209 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
1210 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
1211
1212 if (qualifiers & qualifier_unsigned)
1213 {
1214 type_signature[op_num] = 'u';
1215 print_type_signature_p = true;
1216 }
1217 else if (qualifiers & qualifier_poly)
1218 {
1219 type_signature[op_num] = 'p';
1220 print_type_signature_p = true;
1221 }
1222 else
1223 type_signature[op_num] = 's';
1224
1225 /* Skip an internal operand for vget_{low, high}. */
1226 if (qualifiers & qualifier_internal)
1227 continue;
1228
1229 /* Some builtins have different user-facing types
1230 for certain arguments, encoded in d->mode. */
1231 if (qualifiers & qualifier_map_mode)
1232 op_mode = d->mode;
1233
1234 /* For pointers, we want a pointer to the basic type
1235 of the vector. */
1236 if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
1237 op_mode = GET_MODE_INNER (op_mode);
1238
1239 eltype = aarch64_simd_builtin_type
1240 (op_mode,
1241 (qualifiers & qualifier_unsigned) != 0,
1242 (qualifiers & qualifier_poly) != 0);
1243 gcc_assert (eltype != NULL);
1244
1245 /* Add qualifiers. */
1246 if (qualifiers & qualifier_const)
1247 eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
1248
1249 if (qualifiers & qualifier_pointer)
1250 eltype = build_pointer_type (eltype);
1251
1252 /* If we have reached arg_num == 0, we are at a non-void
1253 return type. Otherwise, we are still processing
1254 arguments. */
1255 if (arg_num == 0)
1256 return_type = eltype;
1257 else
1258 args = tree_cons (NULL_TREE, eltype, args);
1259 }
1260
1261 ftype = build_function_type (return_type, args);
1262
1263 gcc_assert (ftype != NULL);
1264
1265 if (print_type_signature_p)
1266 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
1267 d->name, type_signature);
1268 else
1269 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
1270 d->name);
1271
1272 tree attrs = aarch64_get_attributes (d->flags, d->mode);
1273
1274 if (called_from_pragma)
1275 {
1276 unsigned int raw_code
1277 = (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1278 fndecl = simulate_builtin_function_decl (input_location, namebuf,
1279 ftype, raw_code, NULL,
1280 attrs);
1281 }
1282 else
1283 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
1284
1285 aarch64_builtin_decls[fcode] = fndecl;
1286 }
1287 }
1288
1289 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
1290 indexed by TYPE_INDEX. */
1291 static void
register_tuple_type(unsigned int num_vectors,unsigned int type_index)1292 register_tuple_type (unsigned int num_vectors, unsigned int type_index)
1293 {
1294 aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
1295
1296 /* Synthesize the name of the user-visible vector tuple type. */
1297 const char *vector_type_name = type->name;
1298 char tuple_type_name[sizeof ("bfloat16x4x2_t")];
1299 snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
1300 (int) strlen (vector_type_name) - 4, vector_type_name + 2,
1301 num_vectors);
1302 tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
1303
1304 tree vector_type = type->itype;
1305 tree array_type = build_array_type_nelts (vector_type, num_vectors);
1306 if (type->mode == DImode)
1307 {
1308 if (num_vectors == 2)
1309 SET_TYPE_MODE (array_type, V2x1DImode);
1310 else if (num_vectors == 3)
1311 SET_TYPE_MODE (array_type, V3x1DImode);
1312 else if (num_vectors == 4)
1313 SET_TYPE_MODE (array_type, V4x1DImode);
1314 }
1315
1316 unsigned int alignment
1317 = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
1318 machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
1319 gcc_assert (VECTOR_MODE_P (tuple_mode)
1320 && TYPE_MODE (array_type) == tuple_mode
1321 && TYPE_ALIGN (array_type) == alignment);
1322
1323 tree field = build_decl (input_location, FIELD_DECL,
1324 get_identifier ("val"), array_type);
1325
1326 tree t = lang_hooks.types.simulate_record_decl (input_location,
1327 tuple_type_name,
1328 make_array_slice (&field,
1329 1));
1330 gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
1331 && (flag_pack_struct
1332 || maximum_field_alignment
1333 || (TYPE_MODE_RAW (t) == tuple_mode
1334 && TYPE_ALIGN (t) == alignment)));
1335
1336 aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
1337 aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
1338 }
1339
1340 static bool
aarch64_scalar_builtin_type_p(aarch64_simd_type t)1341 aarch64_scalar_builtin_type_p (aarch64_simd_type t)
1342 {
1343 return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
1344 }
1345
1346 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
1347 set. */
aarch64_simd_switcher(unsigned int extra_flags)1348 aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
1349 : m_old_isa_flags (aarch64_isa_flags),
1350 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
1351 {
1352 /* Changing the ISA flags should be enough here. We shouldn't need to
1353 pay the compile-time cost of a full target switch. */
1354 aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
1355 global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
1356 }
1357
~aarch64_simd_switcher()1358 aarch64_simd_switcher::~aarch64_simd_switcher ()
1359 {
1360 if (m_old_general_regs_only)
1361 global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
1362 aarch64_isa_flags = m_old_isa_flags;
1363 }
1364
1365 /* Implement #pragma GCC aarch64 "arm_neon.h". */
1366 void
handle_arm_neon_h(void)1367 handle_arm_neon_h (void)
1368 {
1369 aarch64_simd_switcher simd;
1370
1371 /* Register the AdvSIMD vector tuple types. */
1372 for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
1373 for (unsigned int count = 2; count <= 4; ++count)
1374 if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
1375 register_tuple_type (count, i);
1376
1377 aarch64_init_simd_builtin_functions (true);
1378 }
1379
1380 void
aarch64_init_simd_builtins(void)1381 aarch64_init_simd_builtins (void)
1382 {
1383 if (aarch64_simd_builtins_initialized_p)
1384 return;
1385
1386 aarch64_simd_builtins_initialized_p = true;
1387
1388 aarch64_init_simd_builtin_types ();
1389
1390 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
1391 Therefore we need to preserve the old __builtin scalar types. It can be
1392 removed once all the intrinsics become strongly typed using the qualifier
1393 system. */
1394 aarch64_init_simd_builtin_scalar_types ();
1395
1396 aarch64_init_simd_builtin_functions (false);
1397 if (in_lto_p)
1398 handle_arm_neon_h ();
1399
1400 /* Initialize the remaining fcmla_laneq intrinsics. */
1401 aarch64_init_fcmla_laneq_builtins ();
1402 }
1403
1404 static void
aarch64_init_crc32_builtins()1405 aarch64_init_crc32_builtins ()
1406 {
1407 tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
1408 unsigned int i = 0;
1409
1410 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
1411 {
1412 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
1413 tree argtype = aarch64_simd_builtin_std_type (d->mode,
1414 qualifier_unsigned);
1415 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
1416 tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
1417 tree fndecl
1418 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1419
1420 aarch64_builtin_decls[d->fcode] = fndecl;
1421 }
1422 }
1423
1424 /* Add builtins for reciprocal square root. */
1425
1426 void
aarch64_init_builtin_rsqrt(void)1427 aarch64_init_builtin_rsqrt (void)
1428 {
1429 tree fndecl = NULL;
1430 tree ftype = NULL;
1431
1432 tree V2SF_type_node = build_vector_type (float_type_node, 2);
1433 tree V2DF_type_node = build_vector_type (double_type_node, 2);
1434 tree V4SF_type_node = build_vector_type (float_type_node, 4);
1435
1436 struct builtin_decls_data
1437 {
1438 tree type_node;
1439 const char *builtin_name;
1440 int function_code;
1441 };
1442
1443 builtin_decls_data bdda[] =
1444 {
1445 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1446 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1447 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1448 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1449 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1450 };
1451
1452 builtin_decls_data *bdd = bdda;
1453 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
1454
1455 for (; bdd < bdd_end; bdd++)
1456 {
1457 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1458 tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
1459 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
1460 ftype, bdd->function_code, attrs);
1461 aarch64_builtin_decls[bdd->function_code] = fndecl;
1462 }
1463 }
1464
1465 /* Initialize the backend types that support the user-visible __fp16
1466 type, also initialize a pointer to that type, to be used when
1467 forming HFAs. */
1468
1469 static void
aarch64_init_fp16_types(void)1470 aarch64_init_fp16_types (void)
1471 {
1472 aarch64_fp16_type_node = make_node (REAL_TYPE);
1473 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
1474 layout_type (aarch64_fp16_type_node);
1475
1476 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
1477 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1478 }
1479
1480 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
1481 static void
aarch64_init_bf16_types(void)1482 aarch64_init_bf16_types (void)
1483 {
1484 aarch64_bf16_type_node = make_node (REAL_TYPE);
1485 TYPE_PRECISION (aarch64_bf16_type_node) = 16;
1486 SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
1487 layout_type (aarch64_bf16_type_node);
1488
1489 lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
1490 aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
1491 }
1492
1493 /* Pointer authentication builtins that will become NOP on legacy platform.
1494 Currently, these builtins are for internal use only (libgcc EH unwinder). */
1495
1496 void
aarch64_init_pauth_hint_builtins(void)1497 aarch64_init_pauth_hint_builtins (void)
1498 {
1499 /* Pointer Authentication builtins. */
1500 tree ftype_pointer_auth
1501 = build_function_type_list (ptr_type_node, ptr_type_node,
1502 unsigned_intDI_type_node, NULL_TREE);
1503 tree ftype_pointer_strip
1504 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
1505
1506 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
1507 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
1508 ftype_pointer_auth,
1509 AARCH64_PAUTH_BUILTIN_AUTIA1716);
1510 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
1511 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
1512 ftype_pointer_auth,
1513 AARCH64_PAUTH_BUILTIN_PACIA1716);
1514 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
1515 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
1516 ftype_pointer_auth,
1517 AARCH64_PAUTH_BUILTIN_AUTIB1716);
1518 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
1519 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
1520 ftype_pointer_auth,
1521 AARCH64_PAUTH_BUILTIN_PACIB1716);
1522 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
1523 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
1524 ftype_pointer_strip,
1525 AARCH64_PAUTH_BUILTIN_XPACLRI);
1526 }
1527
1528 /* Initialize the transactional memory extension (TME) builtins. */
1529 static void
aarch64_init_tme_builtins(void)1530 aarch64_init_tme_builtins (void)
1531 {
1532 tree ftype_uint64_void
1533 = build_function_type_list (uint64_type_node, NULL);
1534 tree ftype_void_void
1535 = build_function_type_list (void_type_node, NULL);
1536 tree ftype_void_uint64
1537 = build_function_type_list (void_type_node, uint64_type_node, NULL);
1538
1539 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
1540 = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
1541 ftype_uint64_void,
1542 AARCH64_TME_BUILTIN_TSTART);
1543 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
1544 = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
1545 ftype_uint64_void,
1546 AARCH64_TME_BUILTIN_TTEST);
1547 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
1548 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
1549 ftype_void_void,
1550 AARCH64_TME_BUILTIN_TCOMMIT);
1551 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
1552 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
1553 ftype_void_uint64,
1554 AARCH64_TME_BUILTIN_TCANCEL);
1555 }
1556
1557 /* Add builtins for Random Number instructions. */
1558
1559 static void
aarch64_init_rng_builtins(void)1560 aarch64_init_rng_builtins (void)
1561 {
1562 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
1563 tree ftype
1564 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1565 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1566 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
1567 AARCH64_BUILTIN_RNG_RNDR);
1568 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1569 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
1570 AARCH64_BUILTIN_RNG_RNDRRS);
1571 }
1572
1573 /* Initialize the memory tagging extension (MTE) builtins. */
1574 struct aarch64_mte
1575 {
1576 tree ftype;
1577 enum insn_code icode;
1578 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
1579 AARCH64_MEMTAG_BUILTIN_START - 1];
1580
1581 static void
aarch64_init_memtag_builtins(void)1582 aarch64_init_memtag_builtins (void)
1583 {
1584 tree fntype = NULL;
1585
1586 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
1587 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
1588 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
1589 T, AARCH64_MEMTAG_BUILTIN_##F); \
1590 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
1591 AARCH64_MEMTAG_BUILTIN_START - 1] = \
1592 {T, CODE_FOR_##I};
1593
1594 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1595 uint64_type_node, NULL);
1596 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
1597
1598 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
1599 uint64_type_node, NULL);
1600 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
1601
1602 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
1603 ptr_type_node, NULL);
1604 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
1605
1606 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1607 unsigned_type_node, NULL);
1608 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
1609
1610 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
1611 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
1612
1613 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
1614 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
1615
1616 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
1617 }
1618
1619 /* Add builtins for Load/store 64 Byte instructions. */
1620
1621 typedef struct
1622 {
1623 const char *name;
1624 unsigned int code;
1625 tree type;
1626 } ls64_builtins_data;
1627
1628 static GTY(()) tree ls64_arm_data_t = NULL_TREE;
1629
1630 static void
aarch64_init_ls64_builtins_types(void)1631 aarch64_init_ls64_builtins_types (void)
1632 {
1633 /* Synthesize:
1634
1635 typedef struct {
1636 uint64_t val[8];
1637 } __arm_data512_t; */
1638 const char *tuple_type_name = "__arm_data512_t";
1639 tree node_type = get_typenode_from_name (UINT64_TYPE);
1640 tree array_type = build_array_type_nelts (node_type, 8);
1641 SET_TYPE_MODE (array_type, V8DImode);
1642
1643 gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
1644 gcc_assert (TYPE_ALIGN (array_type) == 64);
1645
1646 tree field = build_decl (input_location, FIELD_DECL,
1647 get_identifier ("val"), array_type);
1648
1649 ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
1650 tuple_type_name,
1651 make_array_slice (&field, 1));
1652
1653 gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
1654 gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
1655 gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
1656 }
1657
1658 static void
aarch64_init_ls64_builtins(void)1659 aarch64_init_ls64_builtins (void)
1660 {
1661 aarch64_init_ls64_builtins_types ();
1662
1663 ls64_builtins_data data[4] = {
1664 {"__arm_ld64b", AARCH64_LS64_BUILTIN_LD64B,
1665 build_function_type_list (ls64_arm_data_t,
1666 const_ptr_type_node, NULL_TREE)},
1667 {"__arm_st64b", AARCH64_LS64_BUILTIN_ST64B,
1668 build_function_type_list (void_type_node, ptr_type_node,
1669 ls64_arm_data_t, NULL_TREE)},
1670 {"__arm_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
1671 build_function_type_list (uint64_type_node, ptr_type_node,
1672 ls64_arm_data_t, NULL_TREE)},
1673 {"__arm_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
1674 build_function_type_list (uint64_type_node, ptr_type_node,
1675 ls64_arm_data_t, NULL_TREE)},
1676 };
1677
1678 for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
1679 aarch64_builtin_decls[data[i].code]
1680 = aarch64_general_simulate_builtin (data[i].name, data[i].type,
1681 data[i].code);
1682 }
1683
1684 static void
aarch64_init_data_intrinsics(void)1685 aarch64_init_data_intrinsics (void)
1686 {
1687 tree uint32_fntype = build_function_type_list (uint32_type_node,
1688 uint32_type_node, NULL_TREE);
1689 tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
1690 long_unsigned_type_node,
1691 NULL_TREE);
1692 tree uint64_fntype = build_function_type_list (uint64_type_node,
1693 uint64_type_node, NULL_TREE);
1694 aarch64_builtin_decls[AARCH64_REV16]
1695 = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
1696 AARCH64_REV16);
1697 aarch64_builtin_decls[AARCH64_REV16L]
1698 = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
1699 AARCH64_REV16L);
1700 aarch64_builtin_decls[AARCH64_REV16LL]
1701 = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
1702 AARCH64_REV16LL);
1703 aarch64_builtin_decls[AARCH64_RBIT]
1704 = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
1705 AARCH64_RBIT);
1706 aarch64_builtin_decls[AARCH64_RBITL]
1707 = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
1708 AARCH64_RBITL);
1709 aarch64_builtin_decls[AARCH64_RBITLL]
1710 = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
1711 AARCH64_RBITLL);
1712 }
1713
1714 /* Implement #pragma GCC aarch64 "arm_acle.h". */
1715 void
handle_arm_acle_h(void)1716 handle_arm_acle_h (void)
1717 {
1718 if (TARGET_LS64)
1719 aarch64_init_ls64_builtins ();
1720 }
1721
1722 /* Initialize fpsr fpcr getters and setters. */
1723
1724 static void
aarch64_init_fpsr_fpcr_builtins(void)1725 aarch64_init_fpsr_fpcr_builtins (void)
1726 {
1727 tree ftype_set
1728 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
1729 tree ftype_get
1730 = build_function_type_list (unsigned_type_node, NULL);
1731
1732 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
1733 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
1734 ftype_get,
1735 AARCH64_BUILTIN_GET_FPCR);
1736 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
1737 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
1738 ftype_set,
1739 AARCH64_BUILTIN_SET_FPCR);
1740 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
1741 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
1742 ftype_get,
1743 AARCH64_BUILTIN_GET_FPSR);
1744 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
1745 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
1746 ftype_set,
1747 AARCH64_BUILTIN_SET_FPSR);
1748
1749 ftype_set
1750 = build_function_type_list (void_type_node, long_long_unsigned_type_node,
1751 NULL);
1752 ftype_get
1753 = build_function_type_list (long_long_unsigned_type_node, NULL);
1754
1755 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
1756 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
1757 ftype_get,
1758 AARCH64_BUILTIN_GET_FPCR64);
1759 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
1760 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
1761 ftype_set,
1762 AARCH64_BUILTIN_SET_FPCR64);
1763 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
1764 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
1765 ftype_get,
1766 AARCH64_BUILTIN_GET_FPSR64);
1767 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
1768 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
1769 ftype_set,
1770 AARCH64_BUILTIN_SET_FPSR64);
1771 }
1772
1773 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
1774
1775 void
aarch64_general_init_builtins(void)1776 aarch64_general_init_builtins (void)
1777 {
1778 aarch64_init_fpsr_fpcr_builtins ();
1779
1780 aarch64_init_fp16_types ();
1781
1782 aarch64_init_bf16_types ();
1783
1784 {
1785 aarch64_simd_switcher simd;
1786 aarch64_init_simd_builtins ();
1787 }
1788
1789 aarch64_init_crc32_builtins ();
1790 aarch64_init_builtin_rsqrt ();
1791 aarch64_init_rng_builtins ();
1792 aarch64_init_data_intrinsics ();
1793
1794 tree ftype_jcvt
1795 = build_function_type_list (intSI_type_node, double_type_node, NULL);
1796 aarch64_builtin_decls[AARCH64_JSCVT]
1797 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
1798 AARCH64_JSCVT);
1799
1800 /* Initialize pointer authentication builtins which are backed by instructions
1801 in NOP encoding space.
1802
1803 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
1804 there is no support on return address signing under ILP32, we don't
1805 register them. */
1806 if (!TARGET_ILP32)
1807 aarch64_init_pauth_hint_builtins ();
1808
1809 if (TARGET_TME)
1810 aarch64_init_tme_builtins ();
1811
1812 if (TARGET_MEMTAG)
1813 aarch64_init_memtag_builtins ();
1814
1815 if (in_lto_p)
1816 handle_arm_acle_h ();
1817 }
1818
1819 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
1820 tree
aarch64_general_builtin_decl(unsigned code,bool)1821 aarch64_general_builtin_decl (unsigned code, bool)
1822 {
1823 if (code >= AARCH64_BUILTIN_MAX)
1824 return error_mark_node;
1825
1826 return aarch64_builtin_decls[code];
1827 }
1828
1829 typedef enum
1830 {
1831 SIMD_ARG_COPY_TO_REG,
1832 SIMD_ARG_CONSTANT,
1833 SIMD_ARG_LANE_INDEX,
1834 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
1835 SIMD_ARG_LANE_PAIR_INDEX,
1836 SIMD_ARG_LANE_QUADTUP_INDEX,
1837 SIMD_ARG_STOP
1838 } builtin_simd_arg;
1839
1840
1841 static rtx
aarch64_simd_expand_args(rtx target,int icode,int have_retval,tree exp,builtin_simd_arg * args,machine_mode builtin_mode)1842 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
1843 tree exp, builtin_simd_arg *args,
1844 machine_mode builtin_mode)
1845 {
1846 rtx pat;
1847 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
1848 int opc = 0;
1849
1850 if (have_retval)
1851 {
1852 machine_mode tmode = insn_data[icode].operand[0].mode;
1853 if (!target
1854 || GET_MODE (target) != tmode
1855 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
1856 target = gen_reg_rtx (tmode);
1857 op[opc++] = target;
1858 }
1859
1860 for (;;)
1861 {
1862 builtin_simd_arg thisarg = args[opc - have_retval];
1863
1864 if (thisarg == SIMD_ARG_STOP)
1865 break;
1866 else
1867 {
1868 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
1869 machine_mode mode = insn_data[icode].operand[opc].mode;
1870 op[opc] = expand_normal (arg);
1871
1872 switch (thisarg)
1873 {
1874 case SIMD_ARG_COPY_TO_REG:
1875 if (POINTER_TYPE_P (TREE_TYPE (arg)))
1876 op[opc] = convert_memory_address (Pmode, op[opc]);
1877 /*gcc_assert (GET_MODE (op[opc]) == mode); */
1878 if (!(*insn_data[icode].operand[opc].predicate)
1879 (op[opc], mode))
1880 op[opc] = copy_to_mode_reg (mode, op[opc]);
1881 break;
1882
1883 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
1884 gcc_assert (opc > 1);
1885 if (CONST_INT_P (op[opc]))
1886 {
1887 unsigned int nunits
1888 = GET_MODE_NUNITS (builtin_mode).to_constant ();
1889 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1890 /* Keep to GCC-vector-extension lane indices in the RTL. */
1891 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
1892 INTVAL (op[opc]));
1893 }
1894 goto constant_arg;
1895
1896 case SIMD_ARG_LANE_INDEX:
1897 /* Must be a previous operand into which this is an index. */
1898 gcc_assert (opc > 0);
1899 if (CONST_INT_P (op[opc]))
1900 {
1901 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1902 unsigned int nunits
1903 = GET_MODE_NUNITS (vmode).to_constant ();
1904 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1905 /* Keep to GCC-vector-extension lane indices in the RTL. */
1906 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
1907 }
1908 /* If the lane index isn't a constant then error out. */
1909 goto constant_arg;
1910
1911 case SIMD_ARG_LANE_PAIR_INDEX:
1912 /* Must be a previous operand into which this is an index and
1913 index is restricted to nunits / 2. */
1914 gcc_assert (opc > 0);
1915 if (CONST_INT_P (op[opc]))
1916 {
1917 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1918 unsigned int nunits
1919 = GET_MODE_NUNITS (vmode).to_constant ();
1920 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
1921 /* Keep to GCC-vector-extension lane indices in the RTL. */
1922 int lane = INTVAL (op[opc]);
1923 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
1924 SImode);
1925 }
1926 /* If the lane index isn't a constant then error out. */
1927 goto constant_arg;
1928 case SIMD_ARG_LANE_QUADTUP_INDEX:
1929 /* Must be a previous operand into which this is an index and
1930 index is restricted to nunits / 4. */
1931 gcc_assert (opc > 0);
1932 if (CONST_INT_P (op[opc]))
1933 {
1934 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1935 unsigned int nunits
1936 = GET_MODE_NUNITS (vmode).to_constant ();
1937 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
1938 /* Keep to GCC-vector-extension lane indices in the RTL. */
1939 int lane = INTVAL (op[opc]);
1940 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
1941 SImode);
1942 }
1943 /* If the lane index isn't a constant then error out. */
1944 goto constant_arg;
1945 case SIMD_ARG_CONSTANT:
1946 constant_arg:
1947 if (!(*insn_data[icode].operand[opc].predicate)
1948 (op[opc], mode))
1949 {
1950 error_at (EXPR_LOCATION (exp),
1951 "argument %d must be a constant immediate",
1952 opc + 1 - have_retval);
1953 return const0_rtx;
1954 }
1955 break;
1956
1957 case SIMD_ARG_STOP:
1958 gcc_unreachable ();
1959 }
1960
1961 opc++;
1962 }
1963 }
1964
1965 switch (opc)
1966 {
1967 case 1:
1968 pat = GEN_FCN (icode) (op[0]);
1969 break;
1970
1971 case 2:
1972 pat = GEN_FCN (icode) (op[0], op[1]);
1973 break;
1974
1975 case 3:
1976 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1977 break;
1978
1979 case 4:
1980 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1981 break;
1982
1983 case 5:
1984 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1985 break;
1986
1987 case 6:
1988 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1989 break;
1990
1991 default:
1992 gcc_unreachable ();
1993 }
1994
1995 if (!pat)
1996 return NULL_RTX;
1997
1998 emit_insn (pat);
1999
2000 return target;
2001 }
2002
2003 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
2004 rtx
aarch64_simd_expand_builtin(int fcode,tree exp,rtx target)2005 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
2006 {
2007 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
2008 {
2009 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
2010 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
2011 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
2012 && UINTVAL (elementsize) != 0
2013 && UINTVAL (totalsize) != 0)
2014 {
2015 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
2016 if (CONST_INT_P (lane_idx))
2017 aarch64_simd_lane_bounds (lane_idx, 0,
2018 UINTVAL (totalsize)
2019 / UINTVAL (elementsize),
2020 exp);
2021 else
2022 error_at (EXPR_LOCATION (exp),
2023 "lane index must be a constant immediate");
2024 }
2025 else
2026 error_at (EXPR_LOCATION (exp),
2027 "total size and element size must be a nonzero "
2028 "constant immediate");
2029 /* Don't generate any RTL. */
2030 return const0_rtx;
2031 }
2032 aarch64_simd_builtin_datum *d =
2033 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
2034 enum insn_code icode = d->code;
2035 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
2036 int num_args = insn_data[d->code].n_operands;
2037 int is_void = 0;
2038 int k;
2039
2040 is_void = !!(d->qualifiers[0] & qualifier_void);
2041
2042 num_args += is_void;
2043
2044 for (k = 1; k < num_args; k++)
2045 {
2046 /* We have four arrays of data, each indexed in a different fashion.
2047 qualifiers - element 0 always describes the function return type.
2048 operands - element 0 is either the operand for return value (if
2049 the function has a non-void return type) or the operand for the
2050 first argument.
2051 expr_args - element 0 always holds the first argument.
2052 args - element 0 is always used for the return type. */
2053 int qualifiers_k = k;
2054 int operands_k = k - is_void;
2055 int expr_args_k = k - 1;
2056
2057 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
2058 args[k] = SIMD_ARG_LANE_INDEX;
2059 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
2060 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
2061 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
2062 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
2063 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
2064 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
2065 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
2066 args[k] = SIMD_ARG_CONSTANT;
2067 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
2068 {
2069 rtx arg
2070 = expand_normal (CALL_EXPR_ARG (exp,
2071 (expr_args_k)));
2072 /* Handle constants only if the predicate allows it. */
2073 bool op_const_int_p =
2074 (CONST_INT_P (arg)
2075 && (*insn_data[icode].operand[operands_k].predicate)
2076 (arg, insn_data[icode].operand[operands_k].mode));
2077 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
2078 }
2079 else
2080 args[k] = SIMD_ARG_COPY_TO_REG;
2081
2082 }
2083 args[k] = SIMD_ARG_STOP;
2084
2085 /* The interface to aarch64_simd_expand_args expects a 0 if
2086 the function is void, and a 1 if it is not. */
2087 return aarch64_simd_expand_args
2088 (target, icode, !is_void, exp, &args[1], d->mode);
2089 }
2090
2091 rtx
aarch64_crc32_expand_builtin(int fcode,tree exp,rtx target)2092 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
2093 {
2094 rtx pat;
2095 aarch64_crc_builtin_datum *d
2096 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
2097 enum insn_code icode = d->icode;
2098 tree arg0 = CALL_EXPR_ARG (exp, 0);
2099 tree arg1 = CALL_EXPR_ARG (exp, 1);
2100 rtx op0 = expand_normal (arg0);
2101 rtx op1 = expand_normal (arg1);
2102 machine_mode tmode = insn_data[icode].operand[0].mode;
2103 machine_mode mode0 = insn_data[icode].operand[1].mode;
2104 machine_mode mode1 = insn_data[icode].operand[2].mode;
2105
2106 if (! target
2107 || GET_MODE (target) != tmode
2108 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
2109 target = gen_reg_rtx (tmode);
2110
2111 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
2112 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
2113
2114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
2115 op0 = copy_to_mode_reg (mode0, op0);
2116 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
2117 op1 = copy_to_mode_reg (mode1, op1);
2118
2119 pat = GEN_FCN (icode) (target, op0, op1);
2120 if (!pat)
2121 return NULL_RTX;
2122
2123 emit_insn (pat);
2124 return target;
2125 }
2126
2127 /* Function to expand reciprocal square root builtins. */
2128
2129 static rtx
aarch64_expand_builtin_rsqrt(int fcode,tree exp,rtx target)2130 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
2131 {
2132 tree arg0 = CALL_EXPR_ARG (exp, 0);
2133 rtx op0 = expand_normal (arg0);
2134
2135 rtx (*gen) (rtx, rtx);
2136
2137 switch (fcode)
2138 {
2139 case AARCH64_BUILTIN_RSQRT_DF:
2140 gen = gen_rsqrtdf2;
2141 break;
2142 case AARCH64_BUILTIN_RSQRT_SF:
2143 gen = gen_rsqrtsf2;
2144 break;
2145 case AARCH64_BUILTIN_RSQRT_V2DF:
2146 gen = gen_rsqrtv2df2;
2147 break;
2148 case AARCH64_BUILTIN_RSQRT_V2SF:
2149 gen = gen_rsqrtv2sf2;
2150 break;
2151 case AARCH64_BUILTIN_RSQRT_V4SF:
2152 gen = gen_rsqrtv4sf2;
2153 break;
2154 default: gcc_unreachable ();
2155 }
2156
2157 if (!target)
2158 target = gen_reg_rtx (GET_MODE (op0));
2159
2160 emit_insn (gen (target, op0));
2161
2162 return target;
2163 }
2164
2165 /* Expand a FCMLA lane expression EXP with code FCODE and
2166 result going to TARGET if that is convenient. */
2167
2168 rtx
aarch64_expand_fcmla_builtin(tree exp,rtx target,int fcode)2169 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
2170 {
2171 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
2172 aarch64_fcmla_laneq_builtin_datum* d
2173 = &aarch64_fcmla_lane_builtin_data[bcode];
2174 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
2175 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
2176 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
2177 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
2178 tree tmp = CALL_EXPR_ARG (exp, 3);
2179 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
2180
2181 /* Validate that the lane index is a constant. */
2182 if (!CONST_INT_P (lane_idx))
2183 {
2184 error_at (EXPR_LOCATION (exp),
2185 "argument %d must be a constant immediate", 4);
2186 return const0_rtx;
2187 }
2188
2189 /* Validate that the index is within the expected range. */
2190 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
2191 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
2192
2193 /* Generate the correct register and mode. */
2194 int lane = INTVAL (lane_idx);
2195
2196 if (lane < nunits / 4)
2197 op2 = simplify_gen_subreg (d->mode, op2, quadmode,
2198 subreg_lowpart_offset (d->mode, quadmode));
2199 else
2200 {
2201 /* Select the upper 64 bits, either a V2SF or V4HF, this however
2202 is quite messy, as the operation required even though simple
2203 doesn't have a simple RTL pattern, and seems it's quite hard to
2204 define using a single RTL pattern. The target generic version
2205 gen_highpart_mode generates code that isn't optimal. */
2206 rtx temp1 = gen_reg_rtx (d->mode);
2207 rtx temp2 = gen_reg_rtx (DImode);
2208 temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
2209 subreg_lowpart_offset (d->mode, quadmode));
2210 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
2211 if (BYTES_BIG_ENDIAN)
2212 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
2213 else
2214 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
2215 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
2216
2217 /* And recalculate the index. */
2218 lane -= nunits / 4;
2219 }
2220
2221 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
2222 (max nunits in range check) are valid. Which means only 0-1, so we
2223 only need to know the order in a V2mode. */
2224 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
2225
2226 if (!target
2227 || !REG_P (target)
2228 || GET_MODE (target) != d->mode)
2229 target = gen_reg_rtx (d->mode);
2230
2231 rtx pat = NULL_RTX;
2232
2233 if (d->lane)
2234 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
2235 else
2236 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
2237
2238 if (!pat)
2239 return NULL_RTX;
2240
2241 emit_insn (pat);
2242 return target;
2243 }
2244
2245 /* Function to expand an expression EXP which calls one of the Transactional
2246 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
2247 static rtx
aarch64_expand_builtin_tme(int fcode,tree exp,rtx target)2248 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
2249 {
2250 switch (fcode)
2251 {
2252 case AARCH64_TME_BUILTIN_TSTART:
2253 target = gen_reg_rtx (DImode);
2254 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
2255 break;
2256
2257 case AARCH64_TME_BUILTIN_TTEST:
2258 target = gen_reg_rtx (DImode);
2259 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
2260 break;
2261
2262 case AARCH64_TME_BUILTIN_TCOMMIT:
2263 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
2264 break;
2265
2266 case AARCH64_TME_BUILTIN_TCANCEL:
2267 {
2268 tree arg0 = CALL_EXPR_ARG (exp, 0);
2269 rtx op0 = expand_normal (arg0);
2270 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
2271 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
2272 else
2273 {
2274 error_at (EXPR_LOCATION (exp),
2275 "argument must be a 16-bit constant immediate");
2276 return const0_rtx;
2277 }
2278 }
2279 break;
2280
2281 default :
2282 gcc_unreachable ();
2283 }
2284 return target;
2285 }
2286
2287 /* Function to expand an expression EXP which calls one of the Load/Store
2288 64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
2289 static rtx
aarch64_expand_builtin_ls64(int fcode,tree exp,rtx target)2290 aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
2291 {
2292 expand_operand ops[3];
2293
2294 switch (fcode)
2295 {
2296 case AARCH64_LS64_BUILTIN_LD64B:
2297 {
2298 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2299 create_output_operand (&ops[0], target, V8DImode);
2300 create_input_operand (&ops[1], op0, DImode);
2301 expand_insn (CODE_FOR_ld64b, 2, ops);
2302 return ops[0].value;
2303 }
2304 case AARCH64_LS64_BUILTIN_ST64B:
2305 {
2306 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2307 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2308 create_input_operand (&ops[0], op0, DImode);
2309 create_input_operand (&ops[1], op1, V8DImode);
2310 expand_insn (CODE_FOR_st64b, 2, ops);
2311 return const0_rtx;
2312 }
2313 case AARCH64_LS64_BUILTIN_ST64BV:
2314 {
2315 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2316 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2317 create_output_operand (&ops[0], target, DImode);
2318 create_input_operand (&ops[1], op0, DImode);
2319 create_input_operand (&ops[2], op1, V8DImode);
2320 expand_insn (CODE_FOR_st64bv, 3, ops);
2321 return ops[0].value;
2322 }
2323 case AARCH64_LS64_BUILTIN_ST64BV0:
2324 {
2325 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2326 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2327 create_output_operand (&ops[0], target, DImode);
2328 create_input_operand (&ops[1], op0, DImode);
2329 create_input_operand (&ops[2], op1, V8DImode);
2330 expand_insn (CODE_FOR_st64bv0, 3, ops);
2331 return ops[0].value;
2332 }
2333 }
2334
2335 gcc_unreachable ();
2336 }
2337
2338 /* Expand a random number builtin EXP with code FCODE, putting the result
2339 int TARGET. If IGNORE is true the return value is ignored. */
2340
2341 rtx
aarch64_expand_rng_builtin(tree exp,rtx target,int fcode,int ignore)2342 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
2343 {
2344 rtx pat;
2345 enum insn_code icode;
2346 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
2347 icode = CODE_FOR_aarch64_rndr;
2348 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
2349 icode = CODE_FOR_aarch64_rndrrs;
2350 else
2351 gcc_unreachable ();
2352
2353 rtx rand = gen_reg_rtx (DImode);
2354 pat = GEN_FCN (icode) (rand);
2355 if (!pat)
2356 return NULL_RTX;
2357
2358 tree arg0 = CALL_EXPR_ARG (exp, 0);
2359 rtx res_addr = expand_normal (arg0);
2360 res_addr = convert_memory_address (Pmode, res_addr);
2361 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
2362 emit_insn (pat);
2363 emit_move_insn (res_mem, rand);
2364 /* If the status result is unused don't generate the CSET code. */
2365 if (ignore)
2366 return target;
2367
2368 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
2369 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
2370 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
2371 return target;
2372 }
2373
2374 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
2375 with result going to TARGET. */
2376 static rtx
aarch64_expand_builtin_memtag(int fcode,tree exp,rtx target)2377 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
2378 {
2379 if (TARGET_ILP32)
2380 {
2381 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
2382 return const0_rtx;
2383 }
2384
2385 rtx pat = NULL;
2386 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
2387 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
2388
2389 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2390 machine_mode mode0 = GET_MODE (op0);
2391 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
2392 op0 = convert_to_mode (DImode, op0, true);
2393
2394 switch (fcode)
2395 {
2396 case AARCH64_MEMTAG_BUILTIN_IRG:
2397 case AARCH64_MEMTAG_BUILTIN_GMI:
2398 case AARCH64_MEMTAG_BUILTIN_SUBP:
2399 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
2400 {
2401 if (! target
2402 || GET_MODE (target) != DImode
2403 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
2404 target = gen_reg_rtx (DImode);
2405
2406 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
2407 {
2408 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2409
2410 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
2411 {
2412 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
2413 break;
2414 }
2415 error_at (EXPR_LOCATION (exp),
2416 "argument %d must be a constant immediate "
2417 "in range [0,15]", 2);
2418 return const0_rtx;
2419 }
2420 else
2421 {
2422 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2423 machine_mode mode1 = GET_MODE (op1);
2424 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
2425 op1 = convert_to_mode (DImode, op1, true);
2426 pat = GEN_FCN (icode) (target, op0, op1);
2427 }
2428 break;
2429 }
2430 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
2431 target = op0;
2432 pat = GEN_FCN (icode) (target, op0, const0_rtx);
2433 break;
2434 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
2435 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
2436 break;
2437 default:
2438 gcc_unreachable();
2439 }
2440
2441 if (!pat)
2442 return NULL_RTX;
2443
2444 emit_insn (pat);
2445 return target;
2446 }
2447
2448 /* Function to expand an expression EXP which calls one of the ACLE Data
2449 Intrinsic builtins FCODE with the result going to TARGET. */
2450 static rtx
aarch64_expand_builtin_data_intrinsic(unsigned int fcode,tree exp,rtx target)2451 aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
2452 {
2453 expand_operand ops[2];
2454 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
2455 create_output_operand (&ops[0], target, mode);
2456 create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode);
2457 enum insn_code icode;
2458
2459 switch (fcode)
2460 {
2461 case AARCH64_REV16:
2462 case AARCH64_REV16L:
2463 case AARCH64_REV16LL:
2464 icode = code_for_aarch64_rev16 (mode);
2465 break;
2466 case AARCH64_RBIT:
2467 case AARCH64_RBITL:
2468 case AARCH64_RBITLL:
2469 icode = code_for_aarch64_rbit (mode);
2470 break;
2471 default:
2472 gcc_unreachable ();
2473 }
2474
2475 expand_insn (icode, 2, ops);
2476 return ops[0].value;
2477 }
2478
2479 /* Expand an expression EXP as fpsr or fpcr setter (depending on
2480 UNSPEC) using MODE. */
2481 static void
aarch64_expand_fpsr_fpcr_setter(int unspec,machine_mode mode,tree exp)2482 aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
2483 {
2484 tree arg = CALL_EXPR_ARG (exp, 0);
2485 rtx op = force_reg (mode, expand_normal (arg));
2486 emit_insn (gen_aarch64_set (unspec, mode, op));
2487 }
2488
2489 /* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
2490 Return the target. */
2491 static rtx
aarch64_expand_fpsr_fpcr_getter(enum insn_code icode,machine_mode mode,rtx target)2492 aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
2493 rtx target)
2494 {
2495 expand_operand op;
2496 create_output_operand (&op, target, mode);
2497 expand_insn (icode, 1, &op);
2498 return op.value;
2499 }
2500
2501 /* Expand an expression EXP that calls built-in function FCODE,
2502 with result going to TARGET if that's convenient. IGNORE is true
2503 if the result of the builtin is ignored. */
2504 rtx
aarch64_general_expand_builtin(unsigned int fcode,tree exp,rtx target,int ignore)2505 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
2506 int ignore)
2507 {
2508 int icode;
2509 rtx op0;
2510 tree arg0;
2511
2512 switch (fcode)
2513 {
2514 case AARCH64_BUILTIN_GET_FPCR:
2515 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
2516 SImode, target);
2517 case AARCH64_BUILTIN_SET_FPCR:
2518 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
2519 return target;
2520 case AARCH64_BUILTIN_GET_FPSR:
2521 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
2522 SImode, target);
2523 case AARCH64_BUILTIN_SET_FPSR:
2524 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
2525 return target;
2526 case AARCH64_BUILTIN_GET_FPCR64:
2527 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
2528 DImode, target);
2529 case AARCH64_BUILTIN_SET_FPCR64:
2530 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
2531 return target;
2532 case AARCH64_BUILTIN_GET_FPSR64:
2533 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
2534 DImode, target);
2535 case AARCH64_BUILTIN_SET_FPSR64:
2536 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
2537 return target;
2538 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
2539 case AARCH64_PAUTH_BUILTIN_PACIA1716:
2540 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
2541 case AARCH64_PAUTH_BUILTIN_PACIB1716:
2542 case AARCH64_PAUTH_BUILTIN_XPACLRI:
2543 arg0 = CALL_EXPR_ARG (exp, 0);
2544 op0 = force_reg (Pmode, expand_normal (arg0));
2545
2546 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
2547 {
2548 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
2549 icode = CODE_FOR_xpaclri;
2550 emit_move_insn (lr, op0);
2551 emit_insn (GEN_FCN (icode) ());
2552 return lr;
2553 }
2554 else
2555 {
2556 tree arg1 = CALL_EXPR_ARG (exp, 1);
2557 rtx op1 = force_reg (Pmode, expand_normal (arg1));
2558 switch (fcode)
2559 {
2560 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
2561 icode = CODE_FOR_autia1716;
2562 break;
2563 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
2564 icode = CODE_FOR_autib1716;
2565 break;
2566 case AARCH64_PAUTH_BUILTIN_PACIA1716:
2567 icode = CODE_FOR_pacia1716;
2568 break;
2569 case AARCH64_PAUTH_BUILTIN_PACIB1716:
2570 icode = CODE_FOR_pacib1716;
2571 break;
2572 default:
2573 icode = 0;
2574 gcc_unreachable ();
2575 }
2576
2577 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
2578 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
2579 emit_move_insn (x17_reg, op0);
2580 emit_move_insn (x16_reg, op1);
2581 emit_insn (GEN_FCN (icode) ());
2582 return x17_reg;
2583 }
2584
2585 case AARCH64_JSCVT:
2586 {
2587 expand_operand ops[2];
2588 create_output_operand (&ops[0], target, SImode);
2589 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2590 create_input_operand (&ops[1], op0, DFmode);
2591 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
2592 return ops[0].value;
2593 }
2594
2595 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
2596 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
2597 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
2598 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
2599 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
2600 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
2601 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
2602 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
2603 return aarch64_expand_fcmla_builtin (exp, target, fcode);
2604 case AARCH64_BUILTIN_RNG_RNDR:
2605 case AARCH64_BUILTIN_RNG_RNDRRS:
2606 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
2607 }
2608
2609 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
2610 return aarch64_simd_expand_builtin (fcode, exp, target);
2611 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
2612 return aarch64_crc32_expand_builtin (fcode, exp, target);
2613
2614 if (fcode == AARCH64_BUILTIN_RSQRT_DF
2615 || fcode == AARCH64_BUILTIN_RSQRT_SF
2616 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
2617 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
2618 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
2619 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
2620
2621 if (fcode == AARCH64_TME_BUILTIN_TSTART
2622 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
2623 || fcode == AARCH64_TME_BUILTIN_TTEST
2624 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
2625 return aarch64_expand_builtin_tme (fcode, exp, target);
2626
2627 if (fcode == AARCH64_LS64_BUILTIN_LD64B
2628 || fcode == AARCH64_LS64_BUILTIN_ST64B
2629 || fcode == AARCH64_LS64_BUILTIN_ST64BV
2630 || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
2631 return aarch64_expand_builtin_ls64 (fcode, exp, target);
2632
2633 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
2634 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
2635 return aarch64_expand_builtin_memtag (fcode, exp, target);
2636 if (fcode >= AARCH64_REV16
2637 && fcode <= AARCH64_RBITLL)
2638 return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
2639
2640 gcc_unreachable ();
2641 }
2642
2643 tree
aarch64_builtin_vectorized_function(unsigned int fn,tree type_out,tree type_in)2644 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
2645 tree type_in)
2646 {
2647 machine_mode in_mode, out_mode;
2648
2649 if (TREE_CODE (type_out) != VECTOR_TYPE
2650 || TREE_CODE (type_in) != VECTOR_TYPE)
2651 return NULL_TREE;
2652
2653 out_mode = TYPE_MODE (type_out);
2654 in_mode = TYPE_MODE (type_in);
2655
2656 #undef AARCH64_CHECK_BUILTIN_MODE
2657 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
2658 #define AARCH64_FIND_FRINT_VARIANT(N) \
2659 (AARCH64_CHECK_BUILTIN_MODE (2, D) \
2660 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
2661 : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
2662 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
2663 : (AARCH64_CHECK_BUILTIN_MODE (2, S) \
2664 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
2665 : NULL_TREE)))
2666 switch (fn)
2667 {
2668 #undef AARCH64_CHECK_BUILTIN_MODE
2669 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
2670 (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
2671 CASE_CFN_FLOOR:
2672 return AARCH64_FIND_FRINT_VARIANT (floor);
2673 CASE_CFN_CEIL:
2674 return AARCH64_FIND_FRINT_VARIANT (ceil);
2675 CASE_CFN_TRUNC:
2676 return AARCH64_FIND_FRINT_VARIANT (btrunc);
2677 CASE_CFN_ROUND:
2678 return AARCH64_FIND_FRINT_VARIANT (round);
2679 CASE_CFN_NEARBYINT:
2680 return AARCH64_FIND_FRINT_VARIANT (nearbyint);
2681 CASE_CFN_SQRT:
2682 return AARCH64_FIND_FRINT_VARIANT (sqrt);
2683 #undef AARCH64_CHECK_BUILTIN_MODE
2684 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
2685 (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
2686 CASE_CFN_CLZ:
2687 {
2688 if (AARCH64_CHECK_BUILTIN_MODE (4, S))
2689 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
2690 return NULL_TREE;
2691 }
2692 CASE_CFN_CTZ:
2693 {
2694 if (AARCH64_CHECK_BUILTIN_MODE (2, S))
2695 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
2696 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
2697 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
2698 return NULL_TREE;
2699 }
2700 #undef AARCH64_CHECK_BUILTIN_MODE
2701 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
2702 (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
2703 CASE_CFN_IFLOOR:
2704 CASE_CFN_LFLOOR:
2705 CASE_CFN_LLFLOOR:
2706 {
2707 enum aarch64_builtins builtin;
2708 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
2709 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
2710 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
2711 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
2712 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
2713 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
2714 else
2715 return NULL_TREE;
2716
2717 return aarch64_builtin_decls[builtin];
2718 }
2719 CASE_CFN_ICEIL:
2720 CASE_CFN_LCEIL:
2721 CASE_CFN_LLCEIL:
2722 {
2723 enum aarch64_builtins builtin;
2724 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
2725 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
2726 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
2727 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
2728 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
2729 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
2730 else
2731 return NULL_TREE;
2732
2733 return aarch64_builtin_decls[builtin];
2734 }
2735 CASE_CFN_IROUND:
2736 CASE_CFN_LROUND:
2737 CASE_CFN_LLROUND:
2738 {
2739 enum aarch64_builtins builtin;
2740 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
2741 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
2742 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
2743 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
2744 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
2745 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
2746 else
2747 return NULL_TREE;
2748
2749 return aarch64_builtin_decls[builtin];
2750 }
2751 default:
2752 return NULL_TREE;
2753 }
2754
2755 return NULL_TREE;
2756 }
2757
2758 /* Return builtin for reciprocal square root. */
2759
2760 tree
aarch64_general_builtin_rsqrt(unsigned int fn)2761 aarch64_general_builtin_rsqrt (unsigned int fn)
2762 {
2763 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
2764 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
2765 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
2766 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
2767 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
2768 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
2769 return NULL_TREE;
2770 }
2771
2772 /* Return true if the lane check can be removed as there is no
2773 error going to be emitted. */
2774 static bool
aarch64_fold_builtin_lane_check(tree arg0,tree arg1,tree arg2)2775 aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
2776 {
2777 if (TREE_CODE (arg0) != INTEGER_CST)
2778 return false;
2779 if (TREE_CODE (arg1) != INTEGER_CST)
2780 return false;
2781 if (TREE_CODE (arg2) != INTEGER_CST)
2782 return false;
2783
2784 auto totalsize = wi::to_widest (arg0);
2785 auto elementsize = wi::to_widest (arg1);
2786 if (totalsize == 0 || elementsize == 0)
2787 return false;
2788 auto lane = wi::to_widest (arg2);
2789 auto high = wi::udiv_trunc (totalsize, elementsize);
2790 return wi::ltu_p (lane, high);
2791 }
2792
2793 #undef VAR1
2794 #define VAR1(T, N, MAP, FLAG, A) \
2795 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
2796
2797 /* Try to fold a call to the built-in function with subcode FCODE. The
2798 function is passed the N_ARGS arguments in ARGS and it returns a value
2799 of type TYPE. Return the new expression on success and NULL_TREE on
2800 failure. */
2801 tree
aarch64_general_fold_builtin(unsigned int fcode,tree type,unsigned int n_args ATTRIBUTE_UNUSED,tree * args)2802 aarch64_general_fold_builtin (unsigned int fcode, tree type,
2803 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
2804 {
2805 switch (fcode)
2806 {
2807 BUILTIN_VDQF (UNOP, abs, 2, ALL)
2808 return fold_build1 (ABS_EXPR, type, args[0]);
2809 VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
2810 VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
2811 VAR1 (UNOP, floatv2di, 2, ALL, v2df)
2812 return fold_build1 (FLOAT_EXPR, type, args[0]);
2813 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
2814 gcc_assert (n_args == 3);
2815 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
2816 return void_node;
2817 break;
2818 default:
2819 break;
2820 }
2821
2822 return NULL_TREE;
2823 }
2824
2825 enum aarch64_simd_type
get_mem_type_for_load_store(unsigned int fcode)2826 get_mem_type_for_load_store (unsigned int fcode)
2827 {
2828 switch (fcode)
2829 {
2830 VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
2831 VAR1 (STORE1, st1, 0, STORE, v8qi)
2832 return Int8x8_t;
2833 VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
2834 VAR1 (STORE1, st1, 0, STORE, v16qi)
2835 return Int8x16_t;
2836 VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
2837 VAR1 (STORE1, st1, 0, STORE, v4hi)
2838 return Int16x4_t;
2839 VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
2840 VAR1 (STORE1, st1, 0, STORE, v8hi)
2841 return Int16x8_t;
2842 VAR1 (LOAD1, ld1, 0, LOAD, v2si)
2843 VAR1 (STORE1, st1, 0, STORE, v2si)
2844 return Int32x2_t;
2845 VAR1 (LOAD1, ld1, 0, LOAD, v4si)
2846 VAR1 (STORE1, st1, 0, STORE, v4si)
2847 return Int32x4_t;
2848 VAR1 (LOAD1, ld1, 0, LOAD, v2di)
2849 VAR1 (STORE1, st1, 0, STORE, v2di)
2850 return Int64x2_t;
2851 VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
2852 VAR1 (STORE1_U, st1, 0, STORE, v8qi)
2853 return Uint8x8_t;
2854 VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
2855 VAR1 (STORE1_U, st1, 0, STORE, v16qi)
2856 return Uint8x16_t;
2857 VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
2858 VAR1 (STORE1_U, st1, 0, STORE, v4hi)
2859 return Uint16x4_t;
2860 VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
2861 VAR1 (STORE1_U, st1, 0, STORE, v8hi)
2862 return Uint16x8_t;
2863 VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
2864 VAR1 (STORE1_U, st1, 0, STORE, v2si)
2865 return Uint32x2_t;
2866 VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
2867 VAR1 (STORE1_U, st1, 0, STORE, v4si)
2868 return Uint32x4_t;
2869 VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
2870 VAR1 (STORE1_U, st1, 0, STORE, v2di)
2871 return Uint64x2_t;
2872 VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
2873 VAR1 (STORE1_P, st1, 0, STORE, v8qi)
2874 return Poly8x8_t;
2875 VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
2876 VAR1 (STORE1_P, st1, 0, STORE, v16qi)
2877 return Poly8x16_t;
2878 VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
2879 VAR1 (STORE1_P, st1, 0, STORE, v4hi)
2880 return Poly16x4_t;
2881 VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
2882 VAR1 (STORE1_P, st1, 0, STORE, v8hi)
2883 return Poly16x8_t;
2884 VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
2885 VAR1 (STORE1_P, st1, 0, STORE, v2di)
2886 return Poly64x2_t;
2887 VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
2888 VAR1 (STORE1, st1, 0, STORE, v4hf)
2889 return Float16x4_t;
2890 VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
2891 VAR1 (STORE1, st1, 0, STORE, v8hf)
2892 return Float16x8_t;
2893 VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
2894 VAR1 (STORE1, st1, 0, STORE, v4bf)
2895 return Bfloat16x4_t;
2896 VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
2897 VAR1 (STORE1, st1, 0, STORE, v8bf)
2898 return Bfloat16x8_t;
2899 VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
2900 VAR1 (STORE1, st1, 0, STORE, v2sf)
2901 return Float32x2_t;
2902 VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
2903 VAR1 (STORE1, st1, 0, STORE, v4sf)
2904 return Float32x4_t;
2905 VAR1 (LOAD1, ld1, 0, LOAD, v2df)
2906 VAR1 (STORE1, st1, 0, STORE, v2df)
2907 return Float64x2_t;
2908 default:
2909 gcc_unreachable ();
2910 break;
2911 }
2912 }
2913
2914 /* We've seen a vector load from address ADDR. Record it in
2915 vector_load_decls, if appropriate. */
2916 static void
aarch64_record_vector_load_arg(tree addr)2917 aarch64_record_vector_load_arg (tree addr)
2918 {
2919 tree decl = aarch64_vector_load_decl (addr);
2920 if (!decl)
2921 return;
2922 if (!cfun->machine->vector_load_decls)
2923 cfun->machine->vector_load_decls = hash_set<tree>::create_ggc (31);
2924 cfun->machine->vector_load_decls->add (decl);
2925 }
2926
2927 /* Try to fold STMT, given that it's a call to the built-in function with
2928 subcode FCODE. Return the new statement on success and null on
2929 failure. */
2930 gimple *
aarch64_general_gimple_fold_builtin(unsigned int fcode,gcall * stmt,gimple_stmt_iterator * gsi ATTRIBUTE_UNUSED)2931 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
2932 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
2933 {
2934 gimple *new_stmt = NULL;
2935 unsigned nargs = gimple_call_num_args (stmt);
2936 tree *args = (nargs > 0
2937 ? gimple_call_arg_ptr (stmt, 0)
2938 : &error_mark_node);
2939
2940 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
2941 and unsigned int; it will distinguish according to the types of
2942 the arguments to the __builtin. */
2943 switch (fcode)
2944 {
2945 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
2946 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
2947 1, args[0]);
2948 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
2949 break;
2950
2951 /* Lower sqrt builtins to gimple/internal function sqrt. */
2952 BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
2953 new_stmt = gimple_build_call_internal (IFN_SQRT,
2954 1, args[0]);
2955 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
2956 break;
2957
2958 /*lower store and load neon builtins to gimple. */
2959 BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
2960 BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
2961 BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
2962 /* Punt until after inlining, so that we stand more chance of
2963 recording something meaningful in vector_load_decls. */
2964 if (!cfun->after_inlining)
2965 break;
2966 aarch64_record_vector_load_arg (args[0]);
2967 if (!BYTES_BIG_ENDIAN)
2968 {
2969 enum aarch64_simd_type mem_type
2970 = get_mem_type_for_load_store(fcode);
2971 aarch64_simd_type_info simd_type
2972 = aarch64_simd_types[mem_type];
2973 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
2974 VOIDmode, true);
2975 tree zero = build_zero_cst (elt_ptr_type);
2976 /* Use element type alignment. */
2977 tree access_type
2978 = build_aligned_type (simd_type.itype,
2979 TYPE_ALIGN (simd_type.eltype));
2980 new_stmt
2981 = gimple_build_assign (gimple_get_lhs (stmt),
2982 fold_build2 (MEM_REF,
2983 access_type,
2984 args[0], zero));
2985 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2986 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2987 }
2988 break;
2989
2990 BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
2991 BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
2992 BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
2993 if (!BYTES_BIG_ENDIAN)
2994 {
2995 enum aarch64_simd_type mem_type
2996 = get_mem_type_for_load_store(fcode);
2997 aarch64_simd_type_info simd_type
2998 = aarch64_simd_types[mem_type];
2999 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
3000 VOIDmode, true);
3001 tree zero = build_zero_cst (elt_ptr_type);
3002 /* Use element type alignment. */
3003 tree access_type
3004 = build_aligned_type (simd_type.itype,
3005 TYPE_ALIGN (simd_type.eltype));
3006 new_stmt
3007 = gimple_build_assign (fold_build2 (MEM_REF, access_type,
3008 args[0], zero),
3009 args[1]);
3010 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3011 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3012 }
3013 break;
3014
3015 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
3016 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
3017 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
3018 1, args[0]);
3019 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3020 break;
3021 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
3022 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
3023 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
3024 1, args[0]);
3025 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3026 break;
3027 BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
3028 if (TREE_CODE (args[1]) == INTEGER_CST
3029 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3030 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3031 LSHIFT_EXPR, args[0], args[1]);
3032 break;
3033 BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
3034 BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
3035 {
3036 tree cst = args[1];
3037 tree ctype = TREE_TYPE (cst);
3038 /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
3039 treated as a scalar type not a vector one. */
3040 if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
3041 {
3042 wide_int wcst = wi::to_wide (cst);
3043 tree unit_ty = TREE_TYPE (cst);
3044
3045 wide_int abs_cst = wi::abs (wcst);
3046 if (wi::geu_p (abs_cst, element_precision (args[0])))
3047 break;
3048
3049 if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
3050 {
3051 tree final_cst;
3052 final_cst = wide_int_to_tree (unit_ty, abs_cst);
3053 if (TREE_CODE (cst) != INTEGER_CST)
3054 final_cst = build_uniform_cst (ctype, final_cst);
3055
3056 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3057 RSHIFT_EXPR, args[0],
3058 final_cst);
3059 }
3060 else
3061 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3062 LSHIFT_EXPR, args[0], args[1]);
3063 }
3064 }
3065 break;
3066 BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
3067 VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
3068 BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
3069 VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
3070 if (TREE_CODE (args[1]) == INTEGER_CST
3071 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3072 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3073 RSHIFT_EXPR, args[0], args[1]);
3074 break;
3075 BUILTIN_GPF (BINOP, fmulx, 0, ALL)
3076 {
3077 gcc_assert (nargs == 2);
3078 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
3079 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
3080 if (a0_cst_p || a1_cst_p)
3081 {
3082 if (a0_cst_p && a1_cst_p)
3083 {
3084 tree t0 = TREE_TYPE (args[0]);
3085 real_value a0 = (TREE_REAL_CST (args[0]));
3086 real_value a1 = (TREE_REAL_CST (args[1]));
3087 if (real_equal (&a1, &dconst0))
3088 std::swap (a0, a1);
3089 /* According to real_equal (), +0 equals -0. */
3090 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
3091 {
3092 real_value res = dconst2;
3093 res.sign = a0.sign ^ a1.sign;
3094 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3095 REAL_CST,
3096 build_real (t0, res));
3097 }
3098 else
3099 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3100 MULT_EXPR,
3101 args[0], args[1]);
3102 }
3103 else /* a0_cst_p ^ a1_cst_p. */
3104 {
3105 real_value const_part = a0_cst_p
3106 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
3107 if (!real_equal (&const_part, &dconst0)
3108 && !real_isinf (&const_part))
3109 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3110 MULT_EXPR, args[0],
3111 args[1]);
3112 }
3113 }
3114 if (new_stmt)
3115 {
3116 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3117 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3118 }
3119 break;
3120 }
3121 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
3122 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
3123 {
3124 unlink_stmt_vdef (stmt);
3125 release_defs (stmt);
3126 new_stmt = gimple_build_nop ();
3127 }
3128 break;
3129 default:
3130 break;
3131 }
3132 return new_stmt;
3133 }
3134
3135 void
aarch64_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)3136 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
3137 {
3138 const unsigned AARCH64_FE_INVALID = 1;
3139 const unsigned AARCH64_FE_DIVBYZERO = 2;
3140 const unsigned AARCH64_FE_OVERFLOW = 4;
3141 const unsigned AARCH64_FE_UNDERFLOW = 8;
3142 const unsigned AARCH64_FE_INEXACT = 16;
3143 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
3144 | AARCH64_FE_DIVBYZERO
3145 | AARCH64_FE_OVERFLOW
3146 | AARCH64_FE_UNDERFLOW
3147 | AARCH64_FE_INEXACT);
3148 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
3149 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
3150 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
3151 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
3152 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
3153
3154 /* Generate the equivalence of :
3155 unsigned int fenv_cr;
3156 fenv_cr = __builtin_aarch64_get_fpcr ();
3157
3158 unsigned int fenv_sr;
3159 fenv_sr = __builtin_aarch64_get_fpsr ();
3160
3161 Now set all exceptions to non-stop
3162 unsigned int mask_cr
3163 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
3164 unsigned int masked_cr;
3165 masked_cr = fenv_cr & mask_cr;
3166
3167 And clear all exception flags
3168 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
3169 unsigned int masked_cr;
3170 masked_sr = fenv_sr & mask_sr;
3171
3172 __builtin_aarch64_set_cr (masked_cr);
3173 __builtin_aarch64_set_sr (masked_sr); */
3174
3175 fenv_cr = create_tmp_var_raw (unsigned_type_node);
3176 fenv_sr = create_tmp_var_raw (unsigned_type_node);
3177
3178 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
3179 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
3180 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
3181 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
3182
3183 mask_cr = build_int_cst (unsigned_type_node,
3184 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
3185 mask_sr = build_int_cst (unsigned_type_node,
3186 ~(AARCH64_FE_ALL_EXCEPT));
3187
3188 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
3189 fenv_cr, build_call_expr (get_fpcr, 0),
3190 NULL_TREE, NULL_TREE);
3191 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
3192 fenv_sr, build_call_expr (get_fpsr, 0),
3193 NULL_TREE, NULL_TREE);
3194
3195 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
3196 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
3197
3198 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
3199 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3200
3201 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
3202 hold_fnclex_sr);
3203 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
3204 masked_fenv_sr);
3205 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
3206
3207 *hold = build2 (COMPOUND_EXPR, void_type_node,
3208 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
3209 hold_fnclex);
3210
3211 /* Store the value of masked_fenv to clear the exceptions:
3212 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
3213
3214 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3215
3216 /* Generate the equivalent of :
3217 unsigned int new_fenv_var;
3218 new_fenv_var = __builtin_aarch64_get_fpsr ();
3219
3220 __builtin_aarch64_set_fpsr (fenv_sr);
3221
3222 __atomic_feraiseexcept (new_fenv_var); */
3223
3224 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
3225 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
3226 new_fenv_var, build_call_expr (get_fpsr, 0),
3227 NULL_TREE, NULL_TREE);
3228 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
3229 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
3230 update_call = build_call_expr (atomic_feraiseexcept, 1,
3231 fold_convert (integer_type_node, new_fenv_var));
3232 *update = build2 (COMPOUND_EXPR, void_type_node,
3233 build2 (COMPOUND_EXPR, void_type_node,
3234 reload_fenv, restore_fnenv), update_call);
3235 }
3236
3237 /* Resolve overloaded MEMTAG build-in functions. */
3238 #define AARCH64_BUILTIN_SUBCODE(F) \
3239 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
3240
3241 static tree
aarch64_resolve_overloaded_memtag(location_t loc,tree fndecl,void * pass_params)3242 aarch64_resolve_overloaded_memtag (location_t loc,
3243 tree fndecl, void *pass_params)
3244 {
3245 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
3246 unsigned param_num = params ? params->length() : 0;
3247 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
3248 tree inittype = aarch64_memtag_builtin_data[
3249 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
3250 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
3251
3252 if (param_num != arg_num)
3253 {
3254 TREE_TYPE (fndecl) = inittype;
3255 return NULL_TREE;
3256 }
3257 tree retype = NULL;
3258
3259 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
3260 {
3261 tree t0 = TREE_TYPE ((*params)[0]);
3262 tree t1 = TREE_TYPE ((*params)[1]);
3263
3264 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3265 t0 = ptr_type_node;
3266 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
3267 t1 = ptr_type_node;
3268
3269 if (TYPE_MODE (t0) != DImode)
3270 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3271 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3272
3273 if (TYPE_MODE (t1) != DImode)
3274 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
3275 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
3276
3277 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
3278 }
3279 else
3280 {
3281 tree t0 = TREE_TYPE ((*params)[0]);
3282
3283 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3284 {
3285 TREE_TYPE (fndecl) = inittype;
3286 return NULL_TREE;
3287 }
3288
3289 if (TYPE_MODE (t0) != DImode)
3290 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3291 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3292
3293 switch (fcode)
3294 {
3295 case AARCH64_MEMTAG_BUILTIN_IRG:
3296 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
3297 break;
3298 case AARCH64_MEMTAG_BUILTIN_GMI:
3299 retype = build_function_type_list (uint64_type_node, t0,
3300 uint64_type_node, NULL);
3301 break;
3302 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
3303 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
3304 break;
3305 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3306 retype = build_function_type_list (void_type_node, t0, NULL);
3307 break;
3308 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3309 retype = build_function_type_list (t0, t0, NULL);
3310 break;
3311 default:
3312 return NULL_TREE;
3313 }
3314 }
3315
3316 if (!retype || retype == error_mark_node)
3317 TREE_TYPE (fndecl) = inittype;
3318 else
3319 TREE_TYPE (fndecl) = retype;
3320
3321 return NULL_TREE;
3322 }
3323
3324 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
3325 tree
aarch64_resolve_overloaded_builtin_general(location_t loc,tree function,void * pass_params)3326 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
3327 void *pass_params)
3328 {
3329 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
3330
3331 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
3332 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
3333 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
3334
3335 return NULL_TREE;
3336 }
3337
3338 #undef AARCH64_CHECK_BUILTIN_MODE
3339 #undef AARCH64_FIND_FRINT_VARIANT
3340 #undef CF0
3341 #undef CF1
3342 #undef CF2
3343 #undef CF3
3344 #undef CF4
3345 #undef CF10
3346 #undef VAR1
3347 #undef VAR2
3348 #undef VAR3
3349 #undef VAR4
3350 #undef VAR5
3351 #undef VAR6
3352 #undef VAR7
3353 #undef VAR8
3354 #undef VAR9
3355 #undef VAR10
3356 #undef VAR11
3357
3358 #include "gt-aarch64-builtins.h"
3359