1 /* Builtins' description for AArch64 SIMD architecture. 2 Copyright (C) 2011-2017 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "function.h" 26 #include "basic-block.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "gimple.h" 30 #include "memmodel.h" 31 #include "tm_p.h" 32 #include "expmed.h" 33 #include "optabs.h" 34 #include "recog.h" 35 #include "diagnostic-core.h" 36 #include "fold-const.h" 37 #include "stor-layout.h" 38 #include "explow.h" 39 #include "expr.h" 40 #include "langhooks.h" 41 #include "gimple-iterator.h" 42 #include "case-cfn-macros.h" 43 44 #define v8qi_UP V8QImode 45 #define v4hi_UP V4HImode 46 #define v4hf_UP V4HFmode 47 #define v2si_UP V2SImode 48 #define v2sf_UP V2SFmode 49 #define v1df_UP V1DFmode 50 #define di_UP DImode 51 #define df_UP DFmode 52 #define v16qi_UP V16QImode 53 #define v8hi_UP V8HImode 54 #define v8hf_UP V8HFmode 55 #define v4si_UP V4SImode 56 #define v4sf_UP V4SFmode 57 #define v2di_UP V2DImode 58 #define v2df_UP V2DFmode 59 #define ti_UP TImode 60 #define oi_UP OImode 61 #define ci_UP CImode 62 #define xi_UP XImode 63 #define si_UP SImode 64 #define sf_UP SFmode 65 #define hi_UP HImode 66 #define hf_UP HFmode 67 #define qi_UP QImode 68 #define UP(X) X##_UP 69 70 #define SIMD_MAX_BUILTIN_ARGS 5 71 72 enum aarch64_type_qualifiers 73 { 74 /* T foo. */ 75 qualifier_none = 0x0, 76 /* unsigned T foo. */ 77 qualifier_unsigned = 0x1, /* 1 << 0 */ 78 /* const T foo. */ 79 qualifier_const = 0x2, /* 1 << 1 */ 80 /* T *foo. */ 81 qualifier_pointer = 0x4, /* 1 << 2 */ 82 /* Used when expanding arguments if an operand could 83 be an immediate. */ 84 qualifier_immediate = 0x8, /* 1 << 3 */ 85 qualifier_maybe_immediate = 0x10, /* 1 << 4 */ 86 /* void foo (...). */ 87 qualifier_void = 0x20, /* 1 << 5 */ 88 /* Some patterns may have internal operands, this qualifier is an 89 instruction to the initialisation code to skip this operand. */ 90 qualifier_internal = 0x40, /* 1 << 6 */ 91 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum 92 rather than using the type of the operand. */ 93 qualifier_map_mode = 0x80, /* 1 << 7 */ 94 /* qualifier_pointer | qualifier_map_mode */ 95 qualifier_pointer_map_mode = 0x84, 96 /* qualifier_const | qualifier_pointer | qualifier_map_mode */ 97 qualifier_const_pointer_map_mode = 0x86, 98 /* Polynomial types. */ 99 qualifier_poly = 0x100, 100 /* Lane indices - must be in range, and flipped for bigendian. */ 101 qualifier_lane_index = 0x200, 102 /* Lane indices for single lane structure loads and stores. */ 103 qualifier_struct_load_store_lane_index = 0x400 104 }; 105 106 typedef struct 107 { 108 const char *name; 109 machine_mode mode; 110 const enum insn_code code; 111 unsigned int fcode; 112 enum aarch64_type_qualifiers *qualifiers; 113 } aarch64_simd_builtin_datum; 114 115 static enum aarch64_type_qualifiers 116 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 117 = { qualifier_none, qualifier_none }; 118 #define TYPES_UNOP (aarch64_types_unop_qualifiers) 119 static enum aarch64_type_qualifiers 120 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 121 = { qualifier_unsigned, qualifier_unsigned }; 122 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) 123 static enum aarch64_type_qualifiers 124 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS] 125 = { qualifier_unsigned, qualifier_none }; 126 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers) 127 static enum aarch64_type_qualifiers 128 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 129 = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; 130 #define TYPES_BINOP (aarch64_types_binop_qualifiers) 131 static enum aarch64_type_qualifiers 132 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 133 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; 134 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) 135 static enum aarch64_type_qualifiers 136 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] 137 = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; 138 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) 139 static enum aarch64_type_qualifiers 140 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 141 = { qualifier_none, qualifier_none, qualifier_unsigned }; 142 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) 143 static enum aarch64_type_qualifiers 144 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS] 145 = { qualifier_unsigned, qualifier_none, qualifier_none }; 146 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers) 147 static enum aarch64_type_qualifiers 148 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] 149 = { qualifier_poly, qualifier_poly, qualifier_poly }; 150 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) 151 152 static enum aarch64_type_qualifiers 153 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 154 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; 155 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers) 156 static enum aarch64_type_qualifiers 157 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 158 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; 159 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers) 160 static enum aarch64_type_qualifiers 161 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 162 = { qualifier_unsigned, qualifier_unsigned, 163 qualifier_unsigned, qualifier_unsigned }; 164 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) 165 166 static enum aarch64_type_qualifiers 167 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 168 = { qualifier_none, qualifier_none, qualifier_none, 169 qualifier_none, qualifier_lane_index }; 170 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) 171 172 static enum aarch64_type_qualifiers 173 aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 174 = { qualifier_poly, qualifier_none, qualifier_immediate }; 175 #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers) 176 static enum aarch64_type_qualifiers 177 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 178 = { qualifier_none, qualifier_none, qualifier_immediate }; 179 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) 180 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers) 181 static enum aarch64_type_qualifiers 182 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] 183 = { qualifier_unsigned, qualifier_none, qualifier_immediate }; 184 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) 185 static enum aarch64_type_qualifiers 186 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] 187 = { qualifier_none, qualifier_unsigned, qualifier_immediate }; 188 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers) 189 static enum aarch64_type_qualifiers 190 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] 191 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; 192 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) 193 194 static enum aarch64_type_qualifiers 195 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 196 = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; 197 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) 198 static enum aarch64_type_qualifiers 199 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 200 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; 201 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) 202 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) 203 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) 204 205 static enum aarch64_type_qualifiers 206 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 207 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate}; 208 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers) 209 210 static enum aarch64_type_qualifiers 211 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] 212 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 213 qualifier_immediate }; 214 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) 215 216 217 static enum aarch64_type_qualifiers 218 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] 219 = { qualifier_none, qualifier_none, qualifier_none }; 220 #define TYPES_COMBINE (aarch64_types_combine_qualifiers) 221 222 static enum aarch64_type_qualifiers 223 aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 224 = { qualifier_poly, qualifier_poly, qualifier_poly }; 225 #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers) 226 227 static enum aarch64_type_qualifiers 228 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] 229 = { qualifier_none, qualifier_const_pointer_map_mode }; 230 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers) 231 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) 232 static enum aarch64_type_qualifiers 233 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 234 = { qualifier_none, qualifier_const_pointer_map_mode, 235 qualifier_none, qualifier_struct_load_store_lane_index }; 236 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers) 237 238 static enum aarch64_type_qualifiers 239 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 240 = { qualifier_poly, qualifier_unsigned, 241 qualifier_poly, qualifier_poly }; 242 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers) 243 static enum aarch64_type_qualifiers 244 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS] 245 = { qualifier_none, qualifier_unsigned, 246 qualifier_none, qualifier_none }; 247 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers) 248 static enum aarch64_type_qualifiers 249 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] 250 = { qualifier_unsigned, qualifier_unsigned, 251 qualifier_unsigned, qualifier_unsigned }; 252 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers) 253 254 /* The first argument (return type) of a store should be void type, 255 which we represent with qualifier_void. Their first operand will be 256 a DImode pointer to the location to store to, so we must use 257 qualifier_map_mode | qualifier_pointer to build a pointer to the 258 element type of the vector. */ 259 static enum aarch64_type_qualifiers 260 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 261 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly }; 262 #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers) 263 static enum aarch64_type_qualifiers 264 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] 265 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; 266 #define TYPES_STORE1 (aarch64_types_store1_qualifiers) 267 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) 268 static enum aarch64_type_qualifiers 269 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 270 = { qualifier_void, qualifier_pointer_map_mode, 271 qualifier_none, qualifier_struct_load_store_lane_index }; 272 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) 273 274 #define CF0(N, X) CODE_FOR_aarch64_##N##X 275 #define CF1(N, X) CODE_FOR_##N##X##1 276 #define CF2(N, X) CODE_FOR_##N##X##2 277 #define CF3(N, X) CODE_FOR_##N##X##3 278 #define CF4(N, X) CODE_FOR_##N##X##4 279 #define CF10(N, X) CODE_FOR_##N##X 280 281 #define VAR1(T, N, MAP, A) \ 282 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T}, 283 #define VAR2(T, N, MAP, A, B) \ 284 VAR1 (T, N, MAP, A) \ 285 VAR1 (T, N, MAP, B) 286 #define VAR3(T, N, MAP, A, B, C) \ 287 VAR2 (T, N, MAP, A, B) \ 288 VAR1 (T, N, MAP, C) 289 #define VAR4(T, N, MAP, A, B, C, D) \ 290 VAR3 (T, N, MAP, A, B, C) \ 291 VAR1 (T, N, MAP, D) 292 #define VAR5(T, N, MAP, A, B, C, D, E) \ 293 VAR4 (T, N, MAP, A, B, C, D) \ 294 VAR1 (T, N, MAP, E) 295 #define VAR6(T, N, MAP, A, B, C, D, E, F) \ 296 VAR5 (T, N, MAP, A, B, C, D, E) \ 297 VAR1 (T, N, MAP, F) 298 #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ 299 VAR6 (T, N, MAP, A, B, C, D, E, F) \ 300 VAR1 (T, N, MAP, G) 301 #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ 302 VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ 303 VAR1 (T, N, MAP, H) 304 #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ 305 VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ 306 VAR1 (T, N, MAP, I) 307 #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ 308 VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ 309 VAR1 (T, N, MAP, J) 310 #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ 311 VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ 312 VAR1 (T, N, MAP, K) 313 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ 314 VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ 315 VAR1 (T, N, MAP, L) 316 #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ 317 VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ 318 VAR1 (T, N, MAP, M) 319 #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ 320 VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ 321 VAR1 (T, X, MAP, N) 322 323 #include "aarch64-builtin-iterators.h" 324 325 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { 326 #include "aarch64-simd-builtins.def" 327 }; 328 329 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ 330 #define AARCH64_CRC32_BUILTINS \ 331 CRC32_BUILTIN (crc32b, QI) \ 332 CRC32_BUILTIN (crc32h, HI) \ 333 CRC32_BUILTIN (crc32w, SI) \ 334 CRC32_BUILTIN (crc32x, DI) \ 335 CRC32_BUILTIN (crc32cb, QI) \ 336 CRC32_BUILTIN (crc32ch, HI) \ 337 CRC32_BUILTIN (crc32cw, SI) \ 338 CRC32_BUILTIN (crc32cx, DI) 339 340 typedef struct 341 { 342 const char *name; 343 machine_mode mode; 344 const enum insn_code icode; 345 unsigned int fcode; 346 } aarch64_crc_builtin_datum; 347 348 #define CRC32_BUILTIN(N, M) \ 349 AARCH64_BUILTIN_##N, 350 351 #undef VAR1 352 #define VAR1(T, N, MAP, A) \ 353 AARCH64_SIMD_BUILTIN_##T##_##N##A, 354 355 enum aarch64_builtins 356 { 357 AARCH64_BUILTIN_MIN, 358 359 AARCH64_BUILTIN_GET_FPCR, 360 AARCH64_BUILTIN_SET_FPCR, 361 AARCH64_BUILTIN_GET_FPSR, 362 AARCH64_BUILTIN_SET_FPSR, 363 364 AARCH64_BUILTIN_RSQRT_DF, 365 AARCH64_BUILTIN_RSQRT_SF, 366 AARCH64_BUILTIN_RSQRT_V2DF, 367 AARCH64_BUILTIN_RSQRT_V2SF, 368 AARCH64_BUILTIN_RSQRT_V4SF, 369 AARCH64_SIMD_BUILTIN_BASE, 370 AARCH64_SIMD_BUILTIN_LANE_CHECK, 371 #include "aarch64-simd-builtins.def" 372 /* The first enum element which is based on an insn_data pattern. */ 373 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1, 374 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START 375 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1, 376 AARCH64_CRC32_BUILTIN_BASE, 377 AARCH64_CRC32_BUILTINS 378 AARCH64_CRC32_BUILTIN_MAX, 379 /* ARMv8.3-A Pointer Authentication Builtins. */ 380 AARCH64_PAUTH_BUILTIN_AUTIA1716, 381 AARCH64_PAUTH_BUILTIN_PACIA1716, 382 AARCH64_PAUTH_BUILTIN_XPACLRI, 383 AARCH64_BUILTIN_MAX 384 }; 385 386 #undef CRC32_BUILTIN 387 #define CRC32_BUILTIN(N, M) \ 388 {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, 389 390 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { 391 AARCH64_CRC32_BUILTINS 392 }; 393 394 #undef CRC32_BUILTIN 395 396 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; 397 398 #define NUM_DREG_TYPES 6 399 #define NUM_QREG_TYPES 6 400 401 /* Internal scalar builtin types. These types are used to support 402 neon intrinsic builtins. They are _not_ user-visible types. Therefore 403 the mangling for these types are implementation defined. */ 404 const char *aarch64_scalar_builtin_types[] = { 405 "__builtin_aarch64_simd_qi", 406 "__builtin_aarch64_simd_hi", 407 "__builtin_aarch64_simd_si", 408 "__builtin_aarch64_simd_hf", 409 "__builtin_aarch64_simd_sf", 410 "__builtin_aarch64_simd_di", 411 "__builtin_aarch64_simd_df", 412 "__builtin_aarch64_simd_poly8", 413 "__builtin_aarch64_simd_poly16", 414 "__builtin_aarch64_simd_poly64", 415 "__builtin_aarch64_simd_poly128", 416 "__builtin_aarch64_simd_ti", 417 "__builtin_aarch64_simd_uqi", 418 "__builtin_aarch64_simd_uhi", 419 "__builtin_aarch64_simd_usi", 420 "__builtin_aarch64_simd_udi", 421 "__builtin_aarch64_simd_ei", 422 "__builtin_aarch64_simd_oi", 423 "__builtin_aarch64_simd_ci", 424 "__builtin_aarch64_simd_xi", 425 NULL 426 }; 427 428 #define ENTRY(E, M, Q, G) E, 429 enum aarch64_simd_type 430 { 431 #include "aarch64-simd-builtin-types.def" 432 ARM_NEON_H_TYPES_LAST 433 }; 434 #undef ENTRY 435 436 struct aarch64_simd_type_info 437 { 438 enum aarch64_simd_type type; 439 440 /* Internal type name. */ 441 const char *name; 442 443 /* Internal type name(mangled). The mangled names conform to the 444 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", 445 Appendix A). To qualify for emission with the mangled names defined in 446 that document, a vector type must not only be of the correct mode but also 447 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these 448 types are registered by aarch64_init_simd_builtin_types (). In other 449 words, vector types defined in other ways e.g. via vector_size attribute 450 will get default mangled names. */ 451 const char *mangle; 452 453 /* Internal type. */ 454 tree itype; 455 456 /* Element type. */ 457 tree eltype; 458 459 /* Machine mode the internal type maps to. */ 460 enum machine_mode mode; 461 462 /* Qualifiers. */ 463 enum aarch64_type_qualifiers q; 464 }; 465 466 #define ENTRY(E, M, Q, G) \ 467 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, M##mode, qualifier_##Q}, 468 static struct aarch64_simd_type_info aarch64_simd_types [] = { 469 #include "aarch64-simd-builtin-types.def" 470 }; 471 #undef ENTRY 472 473 static tree aarch64_simd_intOI_type_node = NULL_TREE; 474 static tree aarch64_simd_intCI_type_node = NULL_TREE; 475 static tree aarch64_simd_intXI_type_node = NULL_TREE; 476 477 /* The user-visible __fp16 type, and a pointer to that type. Used 478 across the back-end. */ 479 tree aarch64_fp16_type_node = NULL_TREE; 480 tree aarch64_fp16_ptr_type_node = NULL_TREE; 481 482 static const char * 483 aarch64_mangle_builtin_scalar_type (const_tree type) 484 { 485 int i = 0; 486 487 while (aarch64_scalar_builtin_types[i] != NULL) 488 { 489 const char *name = aarch64_scalar_builtin_types[i]; 490 491 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL 492 && DECL_NAME (TYPE_NAME (type)) 493 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name)) 494 return aarch64_scalar_builtin_types[i]; 495 i++; 496 } 497 return NULL; 498 } 499 500 static const char * 501 aarch64_mangle_builtin_vector_type (const_tree type) 502 { 503 int i; 504 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 505 506 for (i = 0; i < nelts; i++) 507 if (aarch64_simd_types[i].mode == TYPE_MODE (type) 508 && TYPE_NAME (type) 509 && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL 510 && DECL_NAME (TYPE_NAME (type)) 511 && !strcmp 512 (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), 513 aarch64_simd_types[i].name)) 514 return aarch64_simd_types[i].mangle; 515 516 return NULL; 517 } 518 519 const char * 520 aarch64_mangle_builtin_type (const_tree type) 521 { 522 const char *mangle; 523 /* Walk through all the AArch64 builtins types tables to filter out the 524 incoming type. */ 525 if ((mangle = aarch64_mangle_builtin_vector_type (type)) 526 || (mangle = aarch64_mangle_builtin_scalar_type (type))) 527 return mangle; 528 529 return NULL; 530 } 531 532 static tree 533 aarch64_simd_builtin_std_type (enum machine_mode mode, 534 enum aarch64_type_qualifiers q) 535 { 536 #define QUAL_TYPE(M) \ 537 ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node); 538 switch (mode) 539 { 540 case QImode: 541 return QUAL_TYPE (QI); 542 case HImode: 543 return QUAL_TYPE (HI); 544 case SImode: 545 return QUAL_TYPE (SI); 546 case DImode: 547 return QUAL_TYPE (DI); 548 case TImode: 549 return QUAL_TYPE (TI); 550 case OImode: 551 return aarch64_simd_intOI_type_node; 552 case CImode: 553 return aarch64_simd_intCI_type_node; 554 case XImode: 555 return aarch64_simd_intXI_type_node; 556 case HFmode: 557 return aarch64_fp16_type_node; 558 case SFmode: 559 return float_type_node; 560 case DFmode: 561 return double_type_node; 562 default: 563 gcc_unreachable (); 564 } 565 #undef QUAL_TYPE 566 } 567 568 static tree 569 aarch64_lookup_simd_builtin_type (enum machine_mode mode, 570 enum aarch64_type_qualifiers q) 571 { 572 int i; 573 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 574 575 /* Non-poly scalar modes map to standard types not in the table. */ 576 if (q != qualifier_poly && !VECTOR_MODE_P (mode)) 577 return aarch64_simd_builtin_std_type (mode, q); 578 579 for (i = 0; i < nelts; i++) 580 if (aarch64_simd_types[i].mode == mode 581 && aarch64_simd_types[i].q == q) 582 return aarch64_simd_types[i].itype; 583 584 return NULL_TREE; 585 } 586 587 static tree 588 aarch64_simd_builtin_type (enum machine_mode mode, 589 bool unsigned_p, bool poly_p) 590 { 591 if (poly_p) 592 return aarch64_lookup_simd_builtin_type (mode, qualifier_poly); 593 else if (unsigned_p) 594 return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned); 595 else 596 return aarch64_lookup_simd_builtin_type (mode, qualifier_none); 597 } 598 599 static void 600 aarch64_init_simd_builtin_types (void) 601 { 602 int i; 603 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 604 tree tdecl; 605 606 /* Init all the element types built by the front-end. */ 607 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node; 608 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node; 609 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node; 610 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node; 611 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node; 612 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node; 613 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node; 614 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node; 615 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node; 616 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node; 617 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node; 618 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node; 619 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node; 620 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node; 621 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node; 622 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node; 623 624 /* Poly types are a world of their own. */ 625 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype = 626 build_distinct_type_copy (unsigned_intQI_type_node); 627 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype = 628 build_distinct_type_copy (unsigned_intHI_type_node); 629 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype = 630 build_distinct_type_copy (unsigned_intDI_type_node); 631 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype = 632 build_distinct_type_copy (unsigned_intTI_type_node); 633 /* Init poly vector element types with scalar poly types. */ 634 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype; 635 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype; 636 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype; 637 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype; 638 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype; 639 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype; 640 641 /* Continue with standard types. */ 642 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node; 643 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; 644 aarch64_simd_types[Float32x2_t].eltype = float_type_node; 645 aarch64_simd_types[Float32x4_t].eltype = float_type_node; 646 aarch64_simd_types[Float64x1_t].eltype = double_type_node; 647 aarch64_simd_types[Float64x2_t].eltype = double_type_node; 648 649 for (i = 0; i < nelts; i++) 650 { 651 tree eltype = aarch64_simd_types[i].eltype; 652 enum machine_mode mode = aarch64_simd_types[i].mode; 653 654 if (aarch64_simd_types[i].itype == NULL) 655 { 656 aarch64_simd_types[i].itype 657 = build_distinct_type_copy 658 (build_vector_type (eltype, GET_MODE_NUNITS (mode))); 659 SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype); 660 } 661 662 tdecl = add_builtin_type (aarch64_simd_types[i].name, 663 aarch64_simd_types[i].itype); 664 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl; 665 } 666 667 #define AARCH64_BUILD_SIGNED_TYPE(mode) \ 668 make_signed_type (GET_MODE_PRECISION (mode)); 669 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode); 670 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode); 671 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode); 672 #undef AARCH64_BUILD_SIGNED_TYPE 673 674 tdecl = add_builtin_type 675 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node); 676 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl; 677 tdecl = add_builtin_type 678 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node); 679 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl; 680 tdecl = add_builtin_type 681 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node); 682 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl; 683 } 684 685 static void 686 aarch64_init_simd_builtin_scalar_types (void) 687 { 688 /* Define typedefs for all the standard scalar types. */ 689 (*lang_hooks.types.register_builtin_type) (intQI_type_node, 690 "__builtin_aarch64_simd_qi"); 691 (*lang_hooks.types.register_builtin_type) (intHI_type_node, 692 "__builtin_aarch64_simd_hi"); 693 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, 694 "__builtin_aarch64_simd_hf"); 695 (*lang_hooks.types.register_builtin_type) (intSI_type_node, 696 "__builtin_aarch64_simd_si"); 697 (*lang_hooks.types.register_builtin_type) (float_type_node, 698 "__builtin_aarch64_simd_sf"); 699 (*lang_hooks.types.register_builtin_type) (intDI_type_node, 700 "__builtin_aarch64_simd_di"); 701 (*lang_hooks.types.register_builtin_type) (double_type_node, 702 "__builtin_aarch64_simd_df"); 703 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, 704 "__builtin_aarch64_simd_poly8"); 705 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, 706 "__builtin_aarch64_simd_poly16"); 707 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, 708 "__builtin_aarch64_simd_poly64"); 709 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node, 710 "__builtin_aarch64_simd_poly128"); 711 (*lang_hooks.types.register_builtin_type) (intTI_type_node, 712 "__builtin_aarch64_simd_ti"); 713 /* Unsigned integer types for various mode sizes. */ 714 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, 715 "__builtin_aarch64_simd_uqi"); 716 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, 717 "__builtin_aarch64_simd_uhi"); 718 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node, 719 "__builtin_aarch64_simd_usi"); 720 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, 721 "__builtin_aarch64_simd_udi"); 722 } 723 724 static bool aarch64_simd_builtins_initialized_p = false; 725 726 void 727 aarch64_init_simd_builtins (void) 728 { 729 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; 730 731 if (aarch64_simd_builtins_initialized_p) 732 return; 733 734 aarch64_simd_builtins_initialized_p = true; 735 736 aarch64_init_simd_builtin_types (); 737 738 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. 739 Therefore we need to preserve the old __builtin scalar types. It can be 740 removed once all the intrinsics become strongly typed using the qualifier 741 system. */ 742 aarch64_init_simd_builtin_scalar_types (); 743 744 tree lane_check_fpr = build_function_type_list (void_type_node, 745 size_type_node, 746 size_type_node, 747 intSI_type_node, 748 NULL); 749 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = 750 add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, 751 AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, 752 NULL, NULL_TREE); 753 754 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) 755 { 756 bool print_type_signature_p = false; 757 char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; 758 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; 759 char namebuf[60]; 760 tree ftype = NULL; 761 tree fndecl = NULL; 762 763 d->fcode = fcode; 764 765 /* We must track two variables here. op_num is 766 the operand number as in the RTL pattern. This is 767 required to access the mode (e.g. V4SF mode) of the 768 argument, from which the base type can be derived. 769 arg_num is an index in to the qualifiers data, which 770 gives qualifiers to the type (e.g. const unsigned). 771 The reason these two variables may differ by one is the 772 void return type. While all return types take the 0th entry 773 in the qualifiers array, there is no operand for them in the 774 RTL pattern. */ 775 int op_num = insn_data[d->code].n_operands - 1; 776 int arg_num = d->qualifiers[0] & qualifier_void 777 ? op_num + 1 778 : op_num; 779 tree return_type = void_type_node, args = void_list_node; 780 tree eltype; 781 782 /* Build a function type directly from the insn_data for this 783 builtin. The build_function_type () function takes care of 784 removing duplicates for us. */ 785 for (; op_num >= 0; arg_num--, op_num--) 786 { 787 machine_mode op_mode = insn_data[d->code].operand[op_num].mode; 788 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; 789 790 if (qualifiers & qualifier_unsigned) 791 { 792 type_signature[op_num] = 'u'; 793 print_type_signature_p = true; 794 } 795 else if (qualifiers & qualifier_poly) 796 { 797 type_signature[op_num] = 'p'; 798 print_type_signature_p = true; 799 } 800 else 801 type_signature[op_num] = 's'; 802 803 /* Skip an internal operand for vget_{low, high}. */ 804 if (qualifiers & qualifier_internal) 805 continue; 806 807 /* Some builtins have different user-facing types 808 for certain arguments, encoded in d->mode. */ 809 if (qualifiers & qualifier_map_mode) 810 op_mode = d->mode; 811 812 /* For pointers, we want a pointer to the basic type 813 of the vector. */ 814 if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) 815 op_mode = GET_MODE_INNER (op_mode); 816 817 eltype = aarch64_simd_builtin_type 818 (op_mode, 819 (qualifiers & qualifier_unsigned) != 0, 820 (qualifiers & qualifier_poly) != 0); 821 gcc_assert (eltype != NULL); 822 823 /* Add qualifiers. */ 824 if (qualifiers & qualifier_const) 825 eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); 826 827 if (qualifiers & qualifier_pointer) 828 eltype = build_pointer_type (eltype); 829 830 /* If we have reached arg_num == 0, we are at a non-void 831 return type. Otherwise, we are still processing 832 arguments. */ 833 if (arg_num == 0) 834 return_type = eltype; 835 else 836 args = tree_cons (NULL_TREE, eltype, args); 837 } 838 839 ftype = build_function_type (return_type, args); 840 841 gcc_assert (ftype != NULL); 842 843 if (print_type_signature_p) 844 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s", 845 d->name, type_signature); 846 else 847 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", 848 d->name); 849 850 fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, 851 NULL, NULL_TREE); 852 aarch64_builtin_decls[fcode] = fndecl; 853 } 854 } 855 856 static void 857 aarch64_init_crc32_builtins () 858 { 859 tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned); 860 unsigned int i = 0; 861 862 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) 863 { 864 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; 865 tree argtype = aarch64_simd_builtin_std_type (d->mode, 866 qualifier_unsigned); 867 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); 868 tree fndecl = add_builtin_function (d->name, ftype, d->fcode, 869 BUILT_IN_MD, NULL, NULL_TREE); 870 871 aarch64_builtin_decls[d->fcode] = fndecl; 872 } 873 } 874 875 /* Add builtins for reciprocal square root. */ 876 877 void 878 aarch64_init_builtin_rsqrt (void) 879 { 880 tree fndecl = NULL; 881 tree ftype = NULL; 882 883 tree V2SF_type_node = build_vector_type (float_type_node, 2); 884 tree V2DF_type_node = build_vector_type (double_type_node, 2); 885 tree V4SF_type_node = build_vector_type (float_type_node, 4); 886 887 struct builtin_decls_data 888 { 889 tree type_node; 890 const char *builtin_name; 891 int function_code; 892 }; 893 894 builtin_decls_data bdda[] = 895 { 896 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF }, 897 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF }, 898 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF }, 899 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF }, 900 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF } 901 }; 902 903 builtin_decls_data *bdd = bdda; 904 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); 905 906 for (; bdd < bdd_end; bdd++) 907 { 908 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); 909 fndecl = add_builtin_function (bdd->builtin_name, 910 ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); 911 aarch64_builtin_decls[bdd->function_code] = fndecl; 912 } 913 } 914 915 /* Initialize the backend types that support the user-visible __fp16 916 type, also initialize a pointer to that type, to be used when 917 forming HFAs. */ 918 919 static void 920 aarch64_init_fp16_types (void) 921 { 922 aarch64_fp16_type_node = make_node (REAL_TYPE); 923 TYPE_PRECISION (aarch64_fp16_type_node) = 16; 924 layout_type (aarch64_fp16_type_node); 925 926 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); 927 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); 928 } 929 930 /* Pointer authentication builtins that will become NOP on legacy platform. 931 Currently, these builtins are for internal use only (libgcc EH unwinder). */ 932 933 void 934 aarch64_init_pauth_hint_builtins (void) 935 { 936 /* Pointer Authentication builtins. */ 937 tree ftype_pointer_auth 938 = build_function_type_list (ptr_type_node, ptr_type_node, 939 unsigned_intDI_type_node, NULL_TREE); 940 tree ftype_pointer_strip 941 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); 942 943 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] 944 = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, 945 AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, 946 NULL_TREE); 947 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] 948 = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, 949 AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, 950 NULL_TREE); 951 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] 952 = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, 953 AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, 954 NULL_TREE); 955 } 956 957 void 958 aarch64_init_builtins (void) 959 { 960 tree ftype_set_fpr 961 = build_function_type_list (void_type_node, unsigned_type_node, NULL); 962 tree ftype_get_fpr 963 = build_function_type_list (unsigned_type_node, NULL); 964 965 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] 966 = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, 967 AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); 968 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] 969 = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, 970 AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); 971 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] 972 = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, 973 AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); 974 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] 975 = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, 976 AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); 977 978 aarch64_init_fp16_types (); 979 980 if (TARGET_SIMD) 981 aarch64_init_simd_builtins (); 982 983 aarch64_init_crc32_builtins (); 984 aarch64_init_builtin_rsqrt (); 985 986 /* Initialize pointer authentication builtins which are backed by instructions 987 in NOP encoding space. 988 989 NOTE: these builtins are supposed to be used by libgcc unwinder only, as 990 there is no support on return address signing under ILP32, we don't 991 register them. */ 992 if (!TARGET_ILP32) 993 aarch64_init_pauth_hint_builtins (); 994 } 995 996 tree 997 aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 998 { 999 if (code >= AARCH64_BUILTIN_MAX) 1000 return error_mark_node; 1001 1002 return aarch64_builtin_decls[code]; 1003 } 1004 1005 typedef enum 1006 { 1007 SIMD_ARG_COPY_TO_REG, 1008 SIMD_ARG_CONSTANT, 1009 SIMD_ARG_LANE_INDEX, 1010 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, 1011 SIMD_ARG_STOP 1012 } builtin_simd_arg; 1013 1014 1015 static rtx 1016 aarch64_simd_expand_args (rtx target, int icode, int have_retval, 1017 tree exp, builtin_simd_arg *args, 1018 enum machine_mode builtin_mode) 1019 { 1020 rtx pat; 1021 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */ 1022 int opc = 0; 1023 1024 if (have_retval) 1025 { 1026 machine_mode tmode = insn_data[icode].operand[0].mode; 1027 if (!target 1028 || GET_MODE (target) != tmode 1029 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 1030 target = gen_reg_rtx (tmode); 1031 op[opc++] = target; 1032 } 1033 1034 for (;;) 1035 { 1036 builtin_simd_arg thisarg = args[opc - have_retval]; 1037 1038 if (thisarg == SIMD_ARG_STOP) 1039 break; 1040 else 1041 { 1042 tree arg = CALL_EXPR_ARG (exp, opc - have_retval); 1043 enum machine_mode mode = insn_data[icode].operand[opc].mode; 1044 op[opc] = expand_normal (arg); 1045 1046 switch (thisarg) 1047 { 1048 case SIMD_ARG_COPY_TO_REG: 1049 if (POINTER_TYPE_P (TREE_TYPE (arg))) 1050 op[opc] = convert_memory_address (Pmode, op[opc]); 1051 /*gcc_assert (GET_MODE (op[opc]) == mode); */ 1052 if (!(*insn_data[icode].operand[opc].predicate) 1053 (op[opc], mode)) 1054 op[opc] = copy_to_mode_reg (mode, op[opc]); 1055 break; 1056 1057 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX: 1058 gcc_assert (opc > 1); 1059 if (CONST_INT_P (op[opc])) 1060 { 1061 aarch64_simd_lane_bounds (op[opc], 0, 1062 GET_MODE_NUNITS (builtin_mode), 1063 exp); 1064 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1065 op[opc] = 1066 GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))); 1067 } 1068 goto constant_arg; 1069 1070 case SIMD_ARG_LANE_INDEX: 1071 /* Must be a previous operand into which this is an index. */ 1072 gcc_assert (opc > 0); 1073 if (CONST_INT_P (op[opc])) 1074 { 1075 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; 1076 aarch64_simd_lane_bounds (op[opc], 1077 0, GET_MODE_NUNITS (vmode), exp); 1078 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1079 op[opc] = GEN_INT (ENDIAN_LANE_N (vmode, INTVAL (op[opc]))); 1080 } 1081 /* Fall through - if the lane index isn't a constant then 1082 the next case will error. */ 1083 /* FALLTHRU */ 1084 case SIMD_ARG_CONSTANT: 1085 constant_arg: 1086 if (!(*insn_data[icode].operand[opc].predicate) 1087 (op[opc], mode)) 1088 { 1089 error ("%Kargument %d must be a constant immediate", 1090 exp, opc + 1 - have_retval); 1091 return const0_rtx; 1092 } 1093 break; 1094 1095 case SIMD_ARG_STOP: 1096 gcc_unreachable (); 1097 } 1098 1099 opc++; 1100 } 1101 } 1102 1103 switch (opc) 1104 { 1105 case 1: 1106 pat = GEN_FCN (icode) (op[0]); 1107 break; 1108 1109 case 2: 1110 pat = GEN_FCN (icode) (op[0], op[1]); 1111 break; 1112 1113 case 3: 1114 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 1115 break; 1116 1117 case 4: 1118 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 1119 break; 1120 1121 case 5: 1122 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); 1123 break; 1124 1125 case 6: 1126 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); 1127 break; 1128 1129 default: 1130 gcc_unreachable (); 1131 } 1132 1133 if (!pat) 1134 return NULL_RTX; 1135 1136 emit_insn (pat); 1137 1138 return target; 1139 } 1140 1141 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */ 1142 rtx 1143 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) 1144 { 1145 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK) 1146 { 1147 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0)); 1148 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1)); 1149 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize) 1150 && UINTVAL (elementsize) != 0 1151 && UINTVAL (totalsize) != 0) 1152 { 1153 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2)); 1154 if (CONST_INT_P (lane_idx)) 1155 aarch64_simd_lane_bounds (lane_idx, 0, 1156 UINTVAL (totalsize) 1157 / UINTVAL (elementsize), 1158 exp); 1159 else 1160 error ("%Klane index must be a constant immediate", exp); 1161 } 1162 else 1163 error ("%Ktotal size and element size must be a non-zero constant immediate", exp); 1164 /* Don't generate any RTL. */ 1165 return const0_rtx; 1166 } 1167 aarch64_simd_builtin_datum *d = 1168 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START]; 1169 enum insn_code icode = d->code; 1170 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; 1171 int num_args = insn_data[d->code].n_operands; 1172 int is_void = 0; 1173 int k; 1174 1175 is_void = !!(d->qualifiers[0] & qualifier_void); 1176 1177 num_args += is_void; 1178 1179 for (k = 1; k < num_args; k++) 1180 { 1181 /* We have four arrays of data, each indexed in a different fashion. 1182 qualifiers - element 0 always describes the function return type. 1183 operands - element 0 is either the operand for return value (if 1184 the function has a non-void return type) or the operand for the 1185 first argument. 1186 expr_args - element 0 always holds the first argument. 1187 args - element 0 is always used for the return type. */ 1188 int qualifiers_k = k; 1189 int operands_k = k - is_void; 1190 int expr_args_k = k - 1; 1191 1192 if (d->qualifiers[qualifiers_k] & qualifier_lane_index) 1193 args[k] = SIMD_ARG_LANE_INDEX; 1194 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) 1195 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; 1196 else if (d->qualifiers[qualifiers_k] & qualifier_immediate) 1197 args[k] = SIMD_ARG_CONSTANT; 1198 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) 1199 { 1200 rtx arg 1201 = expand_normal (CALL_EXPR_ARG (exp, 1202 (expr_args_k))); 1203 /* Handle constants only if the predicate allows it. */ 1204 bool op_const_int_p = 1205 (CONST_INT_P (arg) 1206 && (*insn_data[icode].operand[operands_k].predicate) 1207 (arg, insn_data[icode].operand[operands_k].mode)); 1208 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; 1209 } 1210 else 1211 args[k] = SIMD_ARG_COPY_TO_REG; 1212 1213 } 1214 args[k] = SIMD_ARG_STOP; 1215 1216 /* The interface to aarch64_simd_expand_args expects a 0 if 1217 the function is void, and a 1 if it is not. */ 1218 return aarch64_simd_expand_args 1219 (target, icode, !is_void, exp, &args[1], d->mode); 1220 } 1221 1222 rtx 1223 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) 1224 { 1225 rtx pat; 1226 aarch64_crc_builtin_datum *d 1227 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; 1228 enum insn_code icode = d->icode; 1229 tree arg0 = CALL_EXPR_ARG (exp, 0); 1230 tree arg1 = CALL_EXPR_ARG (exp, 1); 1231 rtx op0 = expand_normal (arg0); 1232 rtx op1 = expand_normal (arg1); 1233 machine_mode tmode = insn_data[icode].operand[0].mode; 1234 machine_mode mode0 = insn_data[icode].operand[1].mode; 1235 machine_mode mode1 = insn_data[icode].operand[2].mode; 1236 1237 if (! target 1238 || GET_MODE (target) != tmode 1239 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 1240 target = gen_reg_rtx (tmode); 1241 1242 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 1243 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 1244 1245 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 1246 op0 = copy_to_mode_reg (mode0, op0); 1247 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 1248 op1 = copy_to_mode_reg (mode1, op1); 1249 1250 pat = GEN_FCN (icode) (target, op0, op1); 1251 if (!pat) 1252 return NULL_RTX; 1253 1254 emit_insn (pat); 1255 return target; 1256 } 1257 1258 /* Function to expand reciprocal square root builtins. */ 1259 1260 static rtx 1261 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target) 1262 { 1263 tree arg0 = CALL_EXPR_ARG (exp, 0); 1264 rtx op0 = expand_normal (arg0); 1265 1266 rtx (*gen) (rtx, rtx); 1267 1268 switch (fcode) 1269 { 1270 case AARCH64_BUILTIN_RSQRT_DF: 1271 gen = gen_rsqrtdf2; 1272 break; 1273 case AARCH64_BUILTIN_RSQRT_SF: 1274 gen = gen_rsqrtsf2; 1275 break; 1276 case AARCH64_BUILTIN_RSQRT_V2DF: 1277 gen = gen_rsqrtv2df2; 1278 break; 1279 case AARCH64_BUILTIN_RSQRT_V2SF: 1280 gen = gen_rsqrtv2sf2; 1281 break; 1282 case AARCH64_BUILTIN_RSQRT_V4SF: 1283 gen = gen_rsqrtv4sf2; 1284 break; 1285 default: gcc_unreachable (); 1286 } 1287 1288 if (!target) 1289 target = gen_reg_rtx (GET_MODE (op0)); 1290 1291 emit_insn (gen (target, op0)); 1292 1293 return target; 1294 } 1295 1296 /* Expand an expression EXP that calls a built-in function, 1297 with result going to TARGET if that's convenient. */ 1298 rtx 1299 aarch64_expand_builtin (tree exp, 1300 rtx target, 1301 rtx subtarget ATTRIBUTE_UNUSED, 1302 machine_mode mode ATTRIBUTE_UNUSED, 1303 int ignore ATTRIBUTE_UNUSED) 1304 { 1305 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 1306 int fcode = DECL_FUNCTION_CODE (fndecl); 1307 int icode; 1308 rtx pat, op0; 1309 tree arg0; 1310 1311 switch (fcode) 1312 { 1313 case AARCH64_BUILTIN_GET_FPCR: 1314 case AARCH64_BUILTIN_SET_FPCR: 1315 case AARCH64_BUILTIN_GET_FPSR: 1316 case AARCH64_BUILTIN_SET_FPSR: 1317 if ((fcode == AARCH64_BUILTIN_GET_FPCR) 1318 || (fcode == AARCH64_BUILTIN_GET_FPSR)) 1319 { 1320 icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? 1321 CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; 1322 target = gen_reg_rtx (SImode); 1323 pat = GEN_FCN (icode) (target); 1324 } 1325 else 1326 { 1327 target = NULL_RTX; 1328 icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? 1329 CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; 1330 arg0 = CALL_EXPR_ARG (exp, 0); 1331 op0 = force_reg (SImode, expand_normal (arg0)); 1332 pat = GEN_FCN (icode) (op0); 1333 } 1334 emit_insn (pat); 1335 return target; 1336 1337 case AARCH64_PAUTH_BUILTIN_AUTIA1716: 1338 case AARCH64_PAUTH_BUILTIN_PACIA1716: 1339 case AARCH64_PAUTH_BUILTIN_XPACLRI: 1340 arg0 = CALL_EXPR_ARG (exp, 0); 1341 op0 = force_reg (Pmode, expand_normal (arg0)); 1342 1343 if (!target) 1344 target = gen_reg_rtx (Pmode); 1345 else 1346 target = force_reg (Pmode, target); 1347 1348 emit_move_insn (target, op0); 1349 1350 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI) 1351 { 1352 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM); 1353 icode = CODE_FOR_xpaclri; 1354 emit_move_insn (lr, op0); 1355 emit_insn (GEN_FCN (icode) ()); 1356 emit_move_insn (target, lr); 1357 } 1358 else 1359 { 1360 tree arg1 = CALL_EXPR_ARG (exp, 1); 1361 rtx op1 = force_reg (Pmode, expand_normal (arg1)); 1362 icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 1363 ? CODE_FOR_paci1716 : CODE_FOR_auti1716); 1364 1365 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); 1366 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); 1367 emit_move_insn (x17_reg, op0); 1368 emit_move_insn (x16_reg, op1); 1369 emit_insn (GEN_FCN (icode) ()); 1370 emit_move_insn (target, x17_reg); 1371 } 1372 1373 return target; 1374 } 1375 1376 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) 1377 return aarch64_simd_expand_builtin (fcode, exp, target); 1378 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) 1379 return aarch64_crc32_expand_builtin (fcode, exp, target); 1380 1381 if (fcode == AARCH64_BUILTIN_RSQRT_DF 1382 || fcode == AARCH64_BUILTIN_RSQRT_SF 1383 || fcode == AARCH64_BUILTIN_RSQRT_V2DF 1384 || fcode == AARCH64_BUILTIN_RSQRT_V2SF 1385 || fcode == AARCH64_BUILTIN_RSQRT_V4SF) 1386 return aarch64_expand_builtin_rsqrt (fcode, exp, target); 1387 1388 gcc_unreachable (); 1389 } 1390 1391 tree 1392 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, 1393 tree type_in) 1394 { 1395 machine_mode in_mode, out_mode; 1396 int in_n, out_n; 1397 1398 if (TREE_CODE (type_out) != VECTOR_TYPE 1399 || TREE_CODE (type_in) != VECTOR_TYPE) 1400 return NULL_TREE; 1401 1402 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 1403 out_n = TYPE_VECTOR_SUBPARTS (type_out); 1404 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 1405 in_n = TYPE_VECTOR_SUBPARTS (type_in); 1406 1407 #undef AARCH64_CHECK_BUILTIN_MODE 1408 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 1409 #define AARCH64_FIND_FRINT_VARIANT(N) \ 1410 (AARCH64_CHECK_BUILTIN_MODE (2, D) \ 1411 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ 1412 : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ 1413 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ 1414 : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ 1415 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ 1416 : NULL_TREE))) 1417 switch (fn) 1418 { 1419 #undef AARCH64_CHECK_BUILTIN_MODE 1420 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 1421 (out_mode == N##Fmode && out_n == C \ 1422 && in_mode == N##Fmode && in_n == C) 1423 CASE_CFN_FLOOR: 1424 return AARCH64_FIND_FRINT_VARIANT (floor); 1425 CASE_CFN_CEIL: 1426 return AARCH64_FIND_FRINT_VARIANT (ceil); 1427 CASE_CFN_TRUNC: 1428 return AARCH64_FIND_FRINT_VARIANT (btrunc); 1429 CASE_CFN_ROUND: 1430 return AARCH64_FIND_FRINT_VARIANT (round); 1431 CASE_CFN_NEARBYINT: 1432 return AARCH64_FIND_FRINT_VARIANT (nearbyint); 1433 CASE_CFN_SQRT: 1434 return AARCH64_FIND_FRINT_VARIANT (sqrt); 1435 #undef AARCH64_CHECK_BUILTIN_MODE 1436 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 1437 (out_mode == SImode && out_n == C \ 1438 && in_mode == N##Imode && in_n == C) 1439 CASE_CFN_CLZ: 1440 { 1441 if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1442 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; 1443 return NULL_TREE; 1444 } 1445 CASE_CFN_CTZ: 1446 { 1447 if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 1448 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si]; 1449 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1450 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; 1451 return NULL_TREE; 1452 } 1453 #undef AARCH64_CHECK_BUILTIN_MODE 1454 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 1455 (out_mode == N##Imode && out_n == C \ 1456 && in_mode == N##Fmode && in_n == C) 1457 CASE_CFN_IFLOOR: 1458 CASE_CFN_LFLOOR: 1459 CASE_CFN_LLFLOOR: 1460 { 1461 enum aarch64_builtins builtin; 1462 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 1463 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; 1464 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1465 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; 1466 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 1467 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; 1468 else 1469 return NULL_TREE; 1470 1471 return aarch64_builtin_decls[builtin]; 1472 } 1473 CASE_CFN_ICEIL: 1474 CASE_CFN_LCEIL: 1475 CASE_CFN_LLCEIL: 1476 { 1477 enum aarch64_builtins builtin; 1478 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 1479 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; 1480 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1481 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; 1482 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 1483 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; 1484 else 1485 return NULL_TREE; 1486 1487 return aarch64_builtin_decls[builtin]; 1488 } 1489 CASE_CFN_IROUND: 1490 CASE_CFN_LROUND: 1491 CASE_CFN_LLROUND: 1492 { 1493 enum aarch64_builtins builtin; 1494 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 1495 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; 1496 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1497 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; 1498 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 1499 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; 1500 else 1501 return NULL_TREE; 1502 1503 return aarch64_builtin_decls[builtin]; 1504 } 1505 case CFN_BUILT_IN_BSWAP16: 1506 #undef AARCH64_CHECK_BUILTIN_MODE 1507 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 1508 (out_mode == N##Imode && out_n == C \ 1509 && in_mode == N##Imode && in_n == C) 1510 if (AARCH64_CHECK_BUILTIN_MODE (4, H)) 1511 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; 1512 else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) 1513 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; 1514 else 1515 return NULL_TREE; 1516 case CFN_BUILT_IN_BSWAP32: 1517 if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 1518 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; 1519 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 1520 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; 1521 else 1522 return NULL_TREE; 1523 case CFN_BUILT_IN_BSWAP64: 1524 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 1525 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; 1526 else 1527 return NULL_TREE; 1528 default: 1529 return NULL_TREE; 1530 } 1531 1532 return NULL_TREE; 1533 } 1534 1535 /* Return builtin for reciprocal square root. */ 1536 1537 tree 1538 aarch64_builtin_rsqrt (unsigned int fn) 1539 { 1540 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) 1541 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; 1542 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) 1543 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; 1544 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf) 1545 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF]; 1546 return NULL_TREE; 1547 } 1548 1549 #undef VAR1 1550 #define VAR1(T, N, MAP, A) \ 1551 case AARCH64_SIMD_BUILTIN_##T##_##N##A: 1552 1553 tree 1554 aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, 1555 bool ignore ATTRIBUTE_UNUSED) 1556 { 1557 int fcode = DECL_FUNCTION_CODE (fndecl); 1558 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 1559 1560 switch (fcode) 1561 { 1562 BUILTIN_VDQF (UNOP, abs, 2) 1563 return fold_build1 (ABS_EXPR, type, args[0]); 1564 VAR1 (UNOP, floatv2si, 2, v2sf) 1565 VAR1 (UNOP, floatv4si, 2, v4sf) 1566 VAR1 (UNOP, floatv2di, 2, v2df) 1567 return fold_build1 (FLOAT_EXPR, type, args[0]); 1568 default: 1569 break; 1570 } 1571 1572 return NULL_TREE; 1573 } 1574 1575 bool 1576 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) 1577 { 1578 bool changed = false; 1579 gimple *stmt = gsi_stmt (*gsi); 1580 tree call = gimple_call_fn (stmt); 1581 tree fndecl; 1582 gimple *new_stmt = NULL; 1583 1584 if (call) 1585 { 1586 fndecl = gimple_call_fndecl (stmt); 1587 if (fndecl) 1588 { 1589 int fcode = DECL_FUNCTION_CODE (fndecl); 1590 unsigned nargs = gimple_call_num_args (stmt); 1591 tree *args = (nargs > 0 1592 ? gimple_call_arg_ptr (stmt, 0) 1593 : &error_mark_node); 1594 1595 /* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int 1596 and unsigned int; it will distinguish according to the types of 1597 the arguments to the __builtin. */ 1598 switch (fcode) 1599 { 1600 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) 1601 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 1602 REDUC_PLUS_EXPR, args[0]); 1603 break; 1604 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) 1605 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) 1606 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 1607 REDUC_MAX_EXPR, args[0]); 1608 break; 1609 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) 1610 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) 1611 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 1612 REDUC_MIN_EXPR, args[0]); 1613 break; 1614 BUILTIN_GPF (BINOP, fmulx, 0) 1615 { 1616 gcc_assert (nargs == 2); 1617 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; 1618 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; 1619 if (a0_cst_p || a1_cst_p) 1620 { 1621 if (a0_cst_p && a1_cst_p) 1622 { 1623 tree t0 = TREE_TYPE (args[0]); 1624 real_value a0 = (TREE_REAL_CST (args[0])); 1625 real_value a1 = (TREE_REAL_CST (args[1])); 1626 if (real_equal (&a1, &dconst0)) 1627 std::swap (a0, a1); 1628 /* According to real_equal (), +0 equals -0. */ 1629 if (real_equal (&a0, &dconst0) && real_isinf (&a1)) 1630 { 1631 real_value res = dconst2; 1632 res.sign = a0.sign ^ a1.sign; 1633 new_stmt = 1634 gimple_build_assign (gimple_call_lhs (stmt), 1635 REAL_CST, 1636 build_real (t0, res)); 1637 } 1638 else 1639 new_stmt = 1640 gimple_build_assign (gimple_call_lhs (stmt), 1641 MULT_EXPR, 1642 args[0], args[1]); 1643 } 1644 else /* a0_cst_p ^ a1_cst_p. */ 1645 { 1646 real_value const_part = a0_cst_p 1647 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); 1648 if (!real_equal (&const_part, &dconst0) 1649 && !real_isinf (&const_part)) 1650 new_stmt = 1651 gimple_build_assign (gimple_call_lhs (stmt), 1652 MULT_EXPR, args[0], args[1]); 1653 } 1654 } 1655 if (new_stmt) 1656 { 1657 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 1658 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 1659 } 1660 break; 1661 } 1662 default: 1663 break; 1664 } 1665 } 1666 } 1667 1668 if (new_stmt) 1669 { 1670 gsi_replace (gsi, new_stmt, true); 1671 changed = true; 1672 } 1673 1674 return changed; 1675 } 1676 1677 void 1678 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 1679 { 1680 const unsigned AARCH64_FE_INVALID = 1; 1681 const unsigned AARCH64_FE_DIVBYZERO = 2; 1682 const unsigned AARCH64_FE_OVERFLOW = 4; 1683 const unsigned AARCH64_FE_UNDERFLOW = 8; 1684 const unsigned AARCH64_FE_INEXACT = 16; 1685 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID 1686 | AARCH64_FE_DIVBYZERO 1687 | AARCH64_FE_OVERFLOW 1688 | AARCH64_FE_UNDERFLOW 1689 | AARCH64_FE_INEXACT); 1690 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; 1691 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; 1692 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; 1693 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; 1694 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; 1695 1696 /* Generate the equivalence of : 1697 unsigned int fenv_cr; 1698 fenv_cr = __builtin_aarch64_get_fpcr (); 1699 1700 unsigned int fenv_sr; 1701 fenv_sr = __builtin_aarch64_get_fpsr (); 1702 1703 Now set all exceptions to non-stop 1704 unsigned int mask_cr 1705 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); 1706 unsigned int masked_cr; 1707 masked_cr = fenv_cr & mask_cr; 1708 1709 And clear all exception flags 1710 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; 1711 unsigned int masked_cr; 1712 masked_sr = fenv_sr & mask_sr; 1713 1714 __builtin_aarch64_set_cr (masked_cr); 1715 __builtin_aarch64_set_sr (masked_sr); */ 1716 1717 fenv_cr = create_tmp_var_raw (unsigned_type_node); 1718 fenv_sr = create_tmp_var_raw (unsigned_type_node); 1719 1720 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; 1721 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; 1722 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; 1723 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; 1724 1725 mask_cr = build_int_cst (unsigned_type_node, 1726 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); 1727 mask_sr = build_int_cst (unsigned_type_node, 1728 ~(AARCH64_FE_ALL_EXCEPT)); 1729 1730 ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, 1731 fenv_cr, build_call_expr (get_fpcr, 0)); 1732 ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, 1733 fenv_sr, build_call_expr (get_fpsr, 0)); 1734 1735 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); 1736 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); 1737 1738 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); 1739 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); 1740 1741 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, 1742 hold_fnclex_sr); 1743 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, 1744 masked_fenv_sr); 1745 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); 1746 1747 *hold = build2 (COMPOUND_EXPR, void_type_node, 1748 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), 1749 hold_fnclex); 1750 1751 /* Store the value of masked_fenv to clear the exceptions: 1752 __builtin_aarch64_set_fpsr (masked_fenv_sr); */ 1753 1754 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); 1755 1756 /* Generate the equivalent of : 1757 unsigned int new_fenv_var; 1758 new_fenv_var = __builtin_aarch64_get_fpsr (); 1759 1760 __builtin_aarch64_set_fpsr (fenv_sr); 1761 1762 __atomic_feraiseexcept (new_fenv_var); */ 1763 1764 new_fenv_var = create_tmp_var_raw (unsigned_type_node); 1765 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, 1766 new_fenv_var, build_call_expr (get_fpsr, 0)); 1767 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); 1768 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 1769 update_call = build_call_expr (atomic_feraiseexcept, 1, 1770 fold_convert (integer_type_node, new_fenv_var)); 1771 *update = build2 (COMPOUND_EXPR, void_type_node, 1772 build2 (COMPOUND_EXPR, void_type_node, 1773 reload_fenv, restore_fnenv), update_call); 1774 } 1775 1776 1777 #undef AARCH64_CHECK_BUILTIN_MODE 1778 #undef AARCH64_FIND_FRINT_VARIANT 1779 #undef CF0 1780 #undef CF1 1781 #undef CF2 1782 #undef CF3 1783 #undef CF4 1784 #undef CF10 1785 #undef VAR1 1786 #undef VAR2 1787 #undef VAR3 1788 #undef VAR4 1789 #undef VAR5 1790 #undef VAR6 1791 #undef VAR7 1792 #undef VAR8 1793 #undef VAR9 1794 #undef VAR10 1795 #undef VAR11 1796 1797 #include "gt-aarch64-builtins.h" 1798