1 /* Builtins' description for AArch64 SIMD architecture. 2 Copyright (C) 2011-2020 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #define IN_TARGET_CODE 1 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "function.h" 28 #include "basic-block.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "gimple.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "expmed.h" 35 #include "optabs.h" 36 #include "recog.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "explow.h" 41 #include "expr.h" 42 #include "langhooks.h" 43 #include "gimple-iterator.h" 44 #include "case-cfn-macros.h" 45 #include "emit-rtl.h" 46 #include "stringpool.h" 47 #include "attribs.h" 48 49 #define v8qi_UP E_V8QImode 50 #define v4hi_UP E_V4HImode 51 #define v4hf_UP E_V4HFmode 52 #define v2si_UP E_V2SImode 53 #define v2sf_UP E_V2SFmode 54 #define v1df_UP E_V1DFmode 55 #define di_UP E_DImode 56 #define df_UP E_DFmode 57 #define v16qi_UP E_V16QImode 58 #define v8hi_UP E_V8HImode 59 #define v8hf_UP E_V8HFmode 60 #define v4si_UP E_V4SImode 61 #define v4sf_UP E_V4SFmode 62 #define v2di_UP E_V2DImode 63 #define v2df_UP E_V2DFmode 64 #define ti_UP E_TImode 65 #define oi_UP E_OImode 66 #define ci_UP E_CImode 67 #define xi_UP E_XImode 68 #define si_UP E_SImode 69 #define sf_UP E_SFmode 70 #define hi_UP E_HImode 71 #define hf_UP E_HFmode 72 #define qi_UP E_QImode 73 #define bf_UP E_BFmode 74 #define v4bf_UP E_V4BFmode 75 #define v8bf_UP E_V8BFmode 76 #define UP(X) X##_UP 77 78 #define SIMD_MAX_BUILTIN_ARGS 5 79 80 enum aarch64_type_qualifiers 81 { 82 /* T foo. */ 83 qualifier_none = 0x0, 84 /* unsigned T foo. */ 85 qualifier_unsigned = 0x1, /* 1 << 0 */ 86 /* const T foo. */ 87 qualifier_const = 0x2, /* 1 << 1 */ 88 /* T *foo. */ 89 qualifier_pointer = 0x4, /* 1 << 2 */ 90 /* Used when expanding arguments if an operand could 91 be an immediate. */ 92 qualifier_immediate = 0x8, /* 1 << 3 */ 93 qualifier_maybe_immediate = 0x10, /* 1 << 4 */ 94 /* void foo (...). */ 95 qualifier_void = 0x20, /* 1 << 5 */ 96 /* Some patterns may have internal operands, this qualifier is an 97 instruction to the initialisation code to skip this operand. */ 98 qualifier_internal = 0x40, /* 1 << 6 */ 99 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum 100 rather than using the type of the operand. */ 101 qualifier_map_mode = 0x80, /* 1 << 7 */ 102 /* qualifier_pointer | qualifier_map_mode */ 103 qualifier_pointer_map_mode = 0x84, 104 /* qualifier_const | qualifier_pointer | qualifier_map_mode */ 105 qualifier_const_pointer_map_mode = 0x86, 106 /* Polynomial types. */ 107 qualifier_poly = 0x100, 108 /* Lane indices - must be in range, and flipped for bigendian. */ 109 qualifier_lane_index = 0x200, 110 /* Lane indices for single lane structure loads and stores. */ 111 qualifier_struct_load_store_lane_index = 0x400, 112 /* Lane indices selected in pairs. - must be in range, and flipped for 113 bigendian. */ 114 qualifier_lane_pair_index = 0x800, 115 /* Lane indices selected in quadtuplets. - must be in range, and flipped for 116 bigendian. */ 117 qualifier_lane_quadtup_index = 0x1000, 118 }; 119 120 typedef struct 121 { 122 const char *name; 123 machine_mode mode; 124 const enum insn_code code; 125 unsigned int fcode; 126 enum aarch64_type_qualifiers *qualifiers; 127 } aarch64_simd_builtin_datum; 128 129 static enum aarch64_type_qualifiers 130 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 131 = { qualifier_none, qualifier_none }; 132 #define TYPES_UNOP (aarch64_types_unop_qualifiers) 133 static enum aarch64_type_qualifiers 134 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 135 = { qualifier_unsigned, qualifier_unsigned }; 136 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) 137 static enum aarch64_type_qualifiers 138 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS] 139 = { qualifier_unsigned, qualifier_none }; 140 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers) 141 static enum aarch64_type_qualifiers 142 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 143 = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; 144 #define TYPES_BINOP (aarch64_types_binop_qualifiers) 145 static enum aarch64_type_qualifiers 146 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 147 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; 148 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) 149 static enum aarch64_type_qualifiers 150 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] 151 = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; 152 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) 153 static enum aarch64_type_qualifiers 154 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 155 = { qualifier_none, qualifier_none, qualifier_unsigned }; 156 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) 157 static enum aarch64_type_qualifiers 158 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS] 159 = { qualifier_unsigned, qualifier_none, qualifier_none }; 160 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers) 161 static enum aarch64_type_qualifiers 162 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] 163 = { qualifier_poly, qualifier_poly, qualifier_poly }; 164 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) 165 166 static enum aarch64_type_qualifiers 167 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] 168 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; 169 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers) 170 static enum aarch64_type_qualifiers 171 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 172 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; 173 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers) 174 static enum aarch64_type_qualifiers 175 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] 176 = { qualifier_unsigned, qualifier_unsigned, 177 qualifier_unsigned, qualifier_unsigned }; 178 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) 179 static enum aarch64_type_qualifiers 180 aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 181 = { qualifier_unsigned, qualifier_unsigned, 182 qualifier_unsigned, qualifier_lane_index }; 183 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers) 184 static enum aarch64_type_qualifiers 185 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 186 = { qualifier_unsigned, qualifier_unsigned, 187 qualifier_unsigned, qualifier_immediate }; 188 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) 189 static enum aarch64_type_qualifiers 190 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] 191 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; 192 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) 193 194 195 static enum aarch64_type_qualifiers 196 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS] 197 = { qualifier_none, qualifier_none, qualifier_none, 198 qualifier_none, qualifier_lane_pair_index }; 199 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers) 200 static enum aarch64_type_qualifiers 201 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 202 = { qualifier_none, qualifier_none, qualifier_none, 203 qualifier_none, qualifier_lane_index }; 204 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) 205 static enum aarch64_type_qualifiers 206 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 207 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 208 qualifier_unsigned, qualifier_lane_index }; 209 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) 210 211 static enum aarch64_type_qualifiers 212 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] 213 = { qualifier_none, qualifier_none, qualifier_unsigned, 214 qualifier_none, qualifier_lane_quadtup_index }; 215 #define TYPES_QUADOPSSUS_LANE_QUADTUP \ 216 (aarch64_types_quadopssus_lane_quadtup_qualifiers) 217 static enum aarch64_type_qualifiers 218 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] 219 = { qualifier_none, qualifier_none, qualifier_none, 220 qualifier_unsigned, qualifier_lane_quadtup_index }; 221 #define TYPES_QUADOPSSSU_LANE_QUADTUP \ 222 (aarch64_types_quadopsssu_lane_quadtup_qualifiers) 223 224 static enum aarch64_type_qualifiers 225 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 226 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 227 qualifier_unsigned, qualifier_immediate }; 228 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers) 229 230 static enum aarch64_type_qualifiers 231 aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 232 = { qualifier_poly, qualifier_none, qualifier_immediate }; 233 #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers) 234 static enum aarch64_type_qualifiers 235 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 236 = { qualifier_none, qualifier_none, qualifier_immediate }; 237 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) 238 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers) 239 static enum aarch64_type_qualifiers 240 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] 241 = { qualifier_unsigned, qualifier_none, qualifier_immediate }; 242 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) 243 static enum aarch64_type_qualifiers 244 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] 245 = { qualifier_none, qualifier_unsigned, qualifier_immediate }; 246 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers) 247 static enum aarch64_type_qualifiers 248 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] 249 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; 250 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) 251 252 static enum aarch64_type_qualifiers 253 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 254 = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; 255 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) 256 static enum aarch64_type_qualifiers 257 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 258 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; 259 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) 260 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) 261 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) 262 263 static enum aarch64_type_qualifiers 264 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 265 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate}; 266 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers) 267 268 static enum aarch64_type_qualifiers 269 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] 270 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 271 qualifier_immediate }; 272 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) 273 274 275 static enum aarch64_type_qualifiers 276 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] 277 = { qualifier_none, qualifier_none, qualifier_none }; 278 #define TYPES_COMBINE (aarch64_types_combine_qualifiers) 279 280 static enum aarch64_type_qualifiers 281 aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 282 = { qualifier_poly, qualifier_poly, qualifier_poly }; 283 #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers) 284 285 static enum aarch64_type_qualifiers 286 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] 287 = { qualifier_none, qualifier_const_pointer_map_mode }; 288 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers) 289 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) 290 static enum aarch64_type_qualifiers 291 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 292 = { qualifier_none, qualifier_const_pointer_map_mode, 293 qualifier_none, qualifier_struct_load_store_lane_index }; 294 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers) 295 296 static enum aarch64_type_qualifiers 297 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 298 = { qualifier_poly, qualifier_unsigned, 299 qualifier_poly, qualifier_poly }; 300 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers) 301 static enum aarch64_type_qualifiers 302 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS] 303 = { qualifier_none, qualifier_unsigned, 304 qualifier_none, qualifier_none }; 305 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers) 306 static enum aarch64_type_qualifiers 307 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] 308 = { qualifier_unsigned, qualifier_unsigned, 309 qualifier_unsigned, qualifier_unsigned }; 310 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers) 311 312 /* The first argument (return type) of a store should be void type, 313 which we represent with qualifier_void. Their first operand will be 314 a DImode pointer to the location to store to, so we must use 315 qualifier_map_mode | qualifier_pointer to build a pointer to the 316 element type of the vector. */ 317 static enum aarch64_type_qualifiers 318 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] 319 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly }; 320 #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers) 321 static enum aarch64_type_qualifiers 322 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] 323 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; 324 #define TYPES_STORE1 (aarch64_types_store1_qualifiers) 325 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) 326 static enum aarch64_type_qualifiers 327 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 328 = { qualifier_void, qualifier_pointer_map_mode, 329 qualifier_none, qualifier_struct_load_store_lane_index }; 330 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) 331 332 #define CF0(N, X) CODE_FOR_aarch64_##N##X 333 #define CF1(N, X) CODE_FOR_##N##X##1 334 #define CF2(N, X) CODE_FOR_##N##X##2 335 #define CF3(N, X) CODE_FOR_##N##X##3 336 #define CF4(N, X) CODE_FOR_##N##X##4 337 #define CF10(N, X) CODE_FOR_##N##X 338 339 #define VAR1(T, N, MAP, A) \ 340 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T}, 341 #define VAR2(T, N, MAP, A, B) \ 342 VAR1 (T, N, MAP, A) \ 343 VAR1 (T, N, MAP, B) 344 #define VAR3(T, N, MAP, A, B, C) \ 345 VAR2 (T, N, MAP, A, B) \ 346 VAR1 (T, N, MAP, C) 347 #define VAR4(T, N, MAP, A, B, C, D) \ 348 VAR3 (T, N, MAP, A, B, C) \ 349 VAR1 (T, N, MAP, D) 350 #define VAR5(T, N, MAP, A, B, C, D, E) \ 351 VAR4 (T, N, MAP, A, B, C, D) \ 352 VAR1 (T, N, MAP, E) 353 #define VAR6(T, N, MAP, A, B, C, D, E, F) \ 354 VAR5 (T, N, MAP, A, B, C, D, E) \ 355 VAR1 (T, N, MAP, F) 356 #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ 357 VAR6 (T, N, MAP, A, B, C, D, E, F) \ 358 VAR1 (T, N, MAP, G) 359 #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ 360 VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ 361 VAR1 (T, N, MAP, H) 362 #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ 363 VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ 364 VAR1 (T, N, MAP, I) 365 #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ 366 VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ 367 VAR1 (T, N, MAP, J) 368 #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ 369 VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ 370 VAR1 (T, N, MAP, K) 371 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ 372 VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ 373 VAR1 (T, N, MAP, L) 374 #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ 375 VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ 376 VAR1 (T, N, MAP, M) 377 #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ 378 VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ 379 VAR1 (T, X, MAP, N) 380 #define VAR15(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ 381 VAR14 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ 382 VAR1 (T, X, MAP, O) 383 #define VAR16(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ 384 VAR15 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ 385 VAR1 (T, X, MAP, P) 386 387 #include "aarch64-builtin-iterators.h" 388 389 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { 390 #include "aarch64-simd-builtins.def" 391 }; 392 393 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ 394 #define AARCH64_CRC32_BUILTINS \ 395 CRC32_BUILTIN (crc32b, QI) \ 396 CRC32_BUILTIN (crc32h, HI) \ 397 CRC32_BUILTIN (crc32w, SI) \ 398 CRC32_BUILTIN (crc32x, DI) \ 399 CRC32_BUILTIN (crc32cb, QI) \ 400 CRC32_BUILTIN (crc32ch, HI) \ 401 CRC32_BUILTIN (crc32cw, SI) \ 402 CRC32_BUILTIN (crc32cx, DI) 403 404 /* The next 8 FCMLA instrinsics require some special handling compared the 405 normal simd intrinsics. */ 406 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \ 407 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \ 408 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \ 409 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \ 410 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \ 411 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \ 412 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \ 413 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \ 414 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \ 415 416 typedef struct 417 { 418 const char *name; 419 machine_mode mode; 420 const enum insn_code icode; 421 unsigned int fcode; 422 } aarch64_crc_builtin_datum; 423 424 /* Hold information about how to expand the FCMLA_LANEQ builtins. */ 425 typedef struct 426 { 427 const char *name; 428 machine_mode mode; 429 const enum insn_code icode; 430 unsigned int fcode; 431 bool lane; 432 } aarch64_fcmla_laneq_builtin_datum; 433 434 #define CRC32_BUILTIN(N, M) \ 435 AARCH64_BUILTIN_##N, 436 437 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \ 438 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, 439 440 #undef VAR1 441 #define VAR1(T, N, MAP, A) \ 442 AARCH64_SIMD_BUILTIN_##T##_##N##A, 443 444 enum aarch64_builtins 445 { 446 AARCH64_BUILTIN_MIN, 447 448 AARCH64_BUILTIN_GET_FPCR, 449 AARCH64_BUILTIN_SET_FPCR, 450 AARCH64_BUILTIN_GET_FPSR, 451 AARCH64_BUILTIN_SET_FPSR, 452 453 AARCH64_BUILTIN_RSQRT_DF, 454 AARCH64_BUILTIN_RSQRT_SF, 455 AARCH64_BUILTIN_RSQRT_V2DF, 456 AARCH64_BUILTIN_RSQRT_V2SF, 457 AARCH64_BUILTIN_RSQRT_V4SF, 458 AARCH64_SIMD_BUILTIN_BASE, 459 AARCH64_SIMD_BUILTIN_LANE_CHECK, 460 #include "aarch64-simd-builtins.def" 461 /* The first enum element which is based on an insn_data pattern. */ 462 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1, 463 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START 464 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1, 465 AARCH64_CRC32_BUILTIN_BASE, 466 AARCH64_CRC32_BUILTINS 467 AARCH64_CRC32_BUILTIN_MAX, 468 /* ARMv8.3-A Pointer Authentication Builtins. */ 469 AARCH64_PAUTH_BUILTIN_AUTIA1716, 470 AARCH64_PAUTH_BUILTIN_PACIA1716, 471 AARCH64_PAUTH_BUILTIN_AUTIB1716, 472 AARCH64_PAUTH_BUILTIN_PACIB1716, 473 AARCH64_PAUTH_BUILTIN_XPACLRI, 474 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */ 475 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, 476 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS 477 /* Builtin for Arm8.3-a Javascript conversion instruction. */ 478 AARCH64_JSCVT, 479 /* TME builtins. */ 480 AARCH64_TME_BUILTIN_TSTART, 481 AARCH64_TME_BUILTIN_TCOMMIT, 482 AARCH64_TME_BUILTIN_TTEST, 483 AARCH64_TME_BUILTIN_TCANCEL, 484 /* Armv8.5-a RNG instruction builtins. */ 485 AARCH64_BUILTIN_RNG_RNDR, 486 AARCH64_BUILTIN_RNG_RNDRRS, 487 /* MEMTAG builtins. */ 488 AARCH64_MEMTAG_BUILTIN_START, 489 AARCH64_MEMTAG_BUILTIN_IRG, 490 AARCH64_MEMTAG_BUILTIN_GMI, 491 AARCH64_MEMTAG_BUILTIN_SUBP, 492 AARCH64_MEMTAG_BUILTIN_INC_TAG, 493 AARCH64_MEMTAG_BUILTIN_SET_TAG, 494 AARCH64_MEMTAG_BUILTIN_GET_TAG, 495 AARCH64_MEMTAG_BUILTIN_END, 496 AARCH64_BUILTIN_MAX 497 }; 498 499 #undef CRC32_BUILTIN 500 #define CRC32_BUILTIN(N, M) \ 501 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, 502 503 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { 504 AARCH64_CRC32_BUILTINS 505 }; 506 507 508 #undef FCMLA_LANEQ_BUILTIN 509 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \ 510 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \ 511 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T}, 512 513 /* This structure contains how to manage the mapping form the builtin to the 514 instruction to generate in the backend and how to invoke the instruction. */ 515 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { 516 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS 517 }; 518 519 #undef CRC32_BUILTIN 520 521 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; 522 523 #define NUM_DREG_TYPES 6 524 #define NUM_QREG_TYPES 6 525 526 /* Internal scalar builtin types. These types are used to support 527 neon intrinsic builtins. They are _not_ user-visible types. Therefore 528 the mangling for these types are implementation defined. */ 529 const char *aarch64_scalar_builtin_types[] = { 530 "__builtin_aarch64_simd_qi", 531 "__builtin_aarch64_simd_hi", 532 "__builtin_aarch64_simd_si", 533 "__builtin_aarch64_simd_hf", 534 "__builtin_aarch64_simd_sf", 535 "__builtin_aarch64_simd_di", 536 "__builtin_aarch64_simd_df", 537 "__builtin_aarch64_simd_poly8", 538 "__builtin_aarch64_simd_poly16", 539 "__builtin_aarch64_simd_poly64", 540 "__builtin_aarch64_simd_poly128", 541 "__builtin_aarch64_simd_ti", 542 "__builtin_aarch64_simd_uqi", 543 "__builtin_aarch64_simd_uhi", 544 "__builtin_aarch64_simd_usi", 545 "__builtin_aarch64_simd_udi", 546 "__builtin_aarch64_simd_ei", 547 "__builtin_aarch64_simd_oi", 548 "__builtin_aarch64_simd_ci", 549 "__builtin_aarch64_simd_xi", 550 "__builtin_aarch64_simd_bf", 551 NULL 552 }; 553 554 #define ENTRY(E, M, Q, G) E, 555 enum aarch64_simd_type 556 { 557 #include "aarch64-simd-builtin-types.def" 558 ARM_NEON_H_TYPES_LAST 559 }; 560 #undef ENTRY 561 562 struct aarch64_simd_type_info 563 { 564 enum aarch64_simd_type type; 565 566 /* Internal type name. */ 567 const char *name; 568 569 /* Internal type name(mangled). The mangled names conform to the 570 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", 571 Appendix A). To qualify for emission with the mangled names defined in 572 that document, a vector type must not only be of the correct mode but also 573 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these 574 types are registered by aarch64_init_simd_builtin_types (). In other 575 words, vector types defined in other ways e.g. via vector_size attribute 576 will get default mangled names. */ 577 const char *mangle; 578 579 /* Internal type. */ 580 tree itype; 581 582 /* Element type. */ 583 tree eltype; 584 585 /* Machine mode the internal type maps to. */ 586 enum machine_mode mode; 587 588 /* Qualifiers. */ 589 enum aarch64_type_qualifiers q; 590 }; 591 592 #define ENTRY(E, M, Q, G) \ 593 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, 594 static struct aarch64_simd_type_info aarch64_simd_types [] = { 595 #include "aarch64-simd-builtin-types.def" 596 }; 597 #undef ENTRY 598 599 static tree aarch64_simd_intOI_type_node = NULL_TREE; 600 static tree aarch64_simd_intCI_type_node = NULL_TREE; 601 static tree aarch64_simd_intXI_type_node = NULL_TREE; 602 603 /* The user-visible __fp16 type, and a pointer to that type. Used 604 across the back-end. */ 605 tree aarch64_fp16_type_node = NULL_TREE; 606 tree aarch64_fp16_ptr_type_node = NULL_TREE; 607 608 /* Back-end node type for brain float (bfloat) types. */ 609 tree aarch64_bf16_type_node = NULL_TREE; 610 tree aarch64_bf16_ptr_type_node = NULL_TREE; 611 612 /* Wrapper around add_builtin_function. NAME is the name of the built-in 613 function, TYPE is the function type, and CODE is the function subcode 614 (relative to AARCH64_BUILTIN_GENERAL). */ 615 static tree 616 aarch64_general_add_builtin (const char *name, tree type, unsigned int code) 617 { 618 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL; 619 return add_builtin_function (name, type, code, BUILT_IN_MD, 620 NULL, NULL_TREE); 621 } 622 623 static const char * 624 aarch64_mangle_builtin_scalar_type (const_tree type) 625 { 626 int i = 0; 627 628 while (aarch64_scalar_builtin_types[i] != NULL) 629 { 630 const char *name = aarch64_scalar_builtin_types[i]; 631 632 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL 633 && DECL_NAME (TYPE_NAME (type)) 634 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name)) 635 return aarch64_scalar_builtin_types[i]; 636 i++; 637 } 638 return NULL; 639 } 640 641 static const char * 642 aarch64_mangle_builtin_vector_type (const_tree type) 643 { 644 int i; 645 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 646 647 for (i = 0; i < nelts; i++) 648 if (aarch64_simd_types[i].mode == TYPE_MODE (type) 649 && TYPE_NAME (type) 650 && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL 651 && DECL_NAME (TYPE_NAME (type)) 652 && !strcmp 653 (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), 654 aarch64_simd_types[i].name)) 655 return aarch64_simd_types[i].mangle; 656 657 return NULL; 658 } 659 660 const char * 661 aarch64_general_mangle_builtin_type (const_tree type) 662 { 663 const char *mangle; 664 /* Walk through all the AArch64 builtins types tables to filter out the 665 incoming type. */ 666 if ((mangle = aarch64_mangle_builtin_vector_type (type)) 667 || (mangle = aarch64_mangle_builtin_scalar_type (type))) 668 return mangle; 669 670 return NULL; 671 } 672 673 static tree 674 aarch64_simd_builtin_std_type (machine_mode mode, 675 enum aarch64_type_qualifiers q) 676 { 677 #define QUAL_TYPE(M) \ 678 ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node); 679 switch (mode) 680 { 681 case E_QImode: 682 return QUAL_TYPE (QI); 683 case E_HImode: 684 return QUAL_TYPE (HI); 685 case E_SImode: 686 return QUAL_TYPE (SI); 687 case E_DImode: 688 return QUAL_TYPE (DI); 689 case E_TImode: 690 return QUAL_TYPE (TI); 691 case E_OImode: 692 return aarch64_simd_intOI_type_node; 693 case E_CImode: 694 return aarch64_simd_intCI_type_node; 695 case E_XImode: 696 return aarch64_simd_intXI_type_node; 697 case E_HFmode: 698 return aarch64_fp16_type_node; 699 case E_SFmode: 700 return float_type_node; 701 case E_DFmode: 702 return double_type_node; 703 case E_BFmode: 704 return aarch64_bf16_type_node; 705 default: 706 gcc_unreachable (); 707 } 708 #undef QUAL_TYPE 709 } 710 711 static tree 712 aarch64_lookup_simd_builtin_type (machine_mode mode, 713 enum aarch64_type_qualifiers q) 714 { 715 int i; 716 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 717 718 /* Non-poly scalar modes map to standard types not in the table. */ 719 if (q != qualifier_poly && !VECTOR_MODE_P (mode)) 720 return aarch64_simd_builtin_std_type (mode, q); 721 722 for (i = 0; i < nelts; i++) 723 if (aarch64_simd_types[i].mode == mode 724 && aarch64_simd_types[i].q == q) 725 return aarch64_simd_types[i].itype; 726 727 return NULL_TREE; 728 } 729 730 static tree 731 aarch64_simd_builtin_type (machine_mode mode, 732 bool unsigned_p, bool poly_p) 733 { 734 if (poly_p) 735 return aarch64_lookup_simd_builtin_type (mode, qualifier_poly); 736 else if (unsigned_p) 737 return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned); 738 else 739 return aarch64_lookup_simd_builtin_type (mode, qualifier_none); 740 } 741 742 static void 743 aarch64_init_simd_builtin_types (void) 744 { 745 int i; 746 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); 747 tree tdecl; 748 749 /* Init all the element types built by the front-end. */ 750 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node; 751 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node; 752 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node; 753 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node; 754 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node; 755 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node; 756 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node; 757 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node; 758 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node; 759 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node; 760 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node; 761 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node; 762 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node; 763 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node; 764 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node; 765 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node; 766 767 /* Poly types are a world of their own. */ 768 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype = 769 build_distinct_type_copy (unsigned_intQI_type_node); 770 /* Prevent front-ends from transforming Poly8_t arrays into string 771 literals. */ 772 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false; 773 774 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype = 775 build_distinct_type_copy (unsigned_intHI_type_node); 776 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype = 777 build_distinct_type_copy (unsigned_intDI_type_node); 778 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype = 779 build_distinct_type_copy (unsigned_intTI_type_node); 780 /* Init poly vector element types with scalar poly types. */ 781 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype; 782 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype; 783 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype; 784 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype; 785 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype; 786 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype; 787 788 /* Continue with standard types. */ 789 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node; 790 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; 791 aarch64_simd_types[Float32x2_t].eltype = float_type_node; 792 aarch64_simd_types[Float32x4_t].eltype = float_type_node; 793 aarch64_simd_types[Float64x1_t].eltype = double_type_node; 794 aarch64_simd_types[Float64x2_t].eltype = double_type_node; 795 796 /* Init Bfloat vector types with underlying __bf16 type. */ 797 aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; 798 aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; 799 800 for (i = 0; i < nelts; i++) 801 { 802 tree eltype = aarch64_simd_types[i].eltype; 803 machine_mode mode = aarch64_simd_types[i].mode; 804 805 if (aarch64_simd_types[i].itype == NULL) 806 { 807 tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode)); 808 type = build_distinct_type_copy (type); 809 SET_TYPE_STRUCTURAL_EQUALITY (type); 810 811 TYPE_ATTRIBUTES (type) 812 = tree_cons (get_identifier ("Advanced SIMD type"), 813 NULL_TREE, TYPE_ATTRIBUTES (type)); 814 aarch64_simd_types[i].itype = type; 815 } 816 817 tdecl = add_builtin_type (aarch64_simd_types[i].name, 818 aarch64_simd_types[i].itype); 819 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl; 820 } 821 822 #define AARCH64_BUILD_SIGNED_TYPE(mode) \ 823 make_signed_type (GET_MODE_PRECISION (mode)); 824 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode); 825 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode); 826 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode); 827 #undef AARCH64_BUILD_SIGNED_TYPE 828 829 tdecl = add_builtin_type 830 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node); 831 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl; 832 tdecl = add_builtin_type 833 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node); 834 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl; 835 tdecl = add_builtin_type 836 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node); 837 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl; 838 } 839 840 static void 841 aarch64_init_simd_builtin_scalar_types (void) 842 { 843 /* Define typedefs for all the standard scalar types. */ 844 (*lang_hooks.types.register_builtin_type) (intQI_type_node, 845 "__builtin_aarch64_simd_qi"); 846 (*lang_hooks.types.register_builtin_type) (intHI_type_node, 847 "__builtin_aarch64_simd_hi"); 848 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, 849 "__builtin_aarch64_simd_hf"); 850 (*lang_hooks.types.register_builtin_type) (intSI_type_node, 851 "__builtin_aarch64_simd_si"); 852 (*lang_hooks.types.register_builtin_type) (float_type_node, 853 "__builtin_aarch64_simd_sf"); 854 (*lang_hooks.types.register_builtin_type) (intDI_type_node, 855 "__builtin_aarch64_simd_di"); 856 (*lang_hooks.types.register_builtin_type) (double_type_node, 857 "__builtin_aarch64_simd_df"); 858 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, 859 "__builtin_aarch64_simd_poly8"); 860 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, 861 "__builtin_aarch64_simd_poly16"); 862 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, 863 "__builtin_aarch64_simd_poly64"); 864 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node, 865 "__builtin_aarch64_simd_poly128"); 866 (*lang_hooks.types.register_builtin_type) (intTI_type_node, 867 "__builtin_aarch64_simd_ti"); 868 (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, 869 "__builtin_aarch64_simd_bf"); 870 /* Unsigned integer types for various mode sizes. */ 871 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, 872 "__builtin_aarch64_simd_uqi"); 873 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, 874 "__builtin_aarch64_simd_uhi"); 875 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node, 876 "__builtin_aarch64_simd_usi"); 877 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, 878 "__builtin_aarch64_simd_udi"); 879 } 880 881 static bool aarch64_simd_builtins_initialized_p = false; 882 883 /* Due to the architecture not providing lane variant of the lane instructions 884 for fcmla we can't use the standard simd builtin expansion code, but we 885 still want the majority of the validation that would normally be done. */ 886 887 void 888 aarch64_init_fcmla_laneq_builtins (void) 889 { 890 unsigned int i = 0; 891 892 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i) 893 { 894 aarch64_fcmla_laneq_builtin_datum* d 895 = &aarch64_fcmla_lane_builtin_data[i]; 896 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none); 897 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require (); 898 tree quadtype 899 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none); 900 tree lanetype 901 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index); 902 tree ftype = build_function_type_list (argtype, argtype, argtype, 903 quadtype, lanetype, NULL_TREE); 904 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); 905 906 aarch64_builtin_decls[d->fcode] = fndecl; 907 } 908 } 909 910 void 911 aarch64_init_simd_builtins (void) 912 { 913 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; 914 915 if (aarch64_simd_builtins_initialized_p) 916 return; 917 918 aarch64_simd_builtins_initialized_p = true; 919 920 aarch64_init_simd_builtin_types (); 921 922 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. 923 Therefore we need to preserve the old __builtin scalar types. It can be 924 removed once all the intrinsics become strongly typed using the qualifier 925 system. */ 926 aarch64_init_simd_builtin_scalar_types (); 927 928 tree lane_check_fpr = build_function_type_list (void_type_node, 929 size_type_node, 930 size_type_node, 931 intSI_type_node, 932 NULL); 933 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] 934 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", 935 lane_check_fpr, 936 AARCH64_SIMD_BUILTIN_LANE_CHECK); 937 938 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) 939 { 940 bool print_type_signature_p = false; 941 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 }; 942 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; 943 char namebuf[60]; 944 tree ftype = NULL; 945 tree fndecl = NULL; 946 947 d->fcode = fcode; 948 949 /* We must track two variables here. op_num is 950 the operand number as in the RTL pattern. This is 951 required to access the mode (e.g. V4SF mode) of the 952 argument, from which the base type can be derived. 953 arg_num is an index in to the qualifiers data, which 954 gives qualifiers to the type (e.g. const unsigned). 955 The reason these two variables may differ by one is the 956 void return type. While all return types take the 0th entry 957 in the qualifiers array, there is no operand for them in the 958 RTL pattern. */ 959 int op_num = insn_data[d->code].n_operands - 1; 960 int arg_num = d->qualifiers[0] & qualifier_void 961 ? op_num + 1 962 : op_num; 963 tree return_type = void_type_node, args = void_list_node; 964 tree eltype; 965 966 /* Build a function type directly from the insn_data for this 967 builtin. The build_function_type () function takes care of 968 removing duplicates for us. */ 969 for (; op_num >= 0; arg_num--, op_num--) 970 { 971 machine_mode op_mode = insn_data[d->code].operand[op_num].mode; 972 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; 973 974 if (qualifiers & qualifier_unsigned) 975 { 976 type_signature[op_num] = 'u'; 977 print_type_signature_p = true; 978 } 979 else if (qualifiers & qualifier_poly) 980 { 981 type_signature[op_num] = 'p'; 982 print_type_signature_p = true; 983 } 984 else 985 type_signature[op_num] = 's'; 986 987 /* Skip an internal operand for vget_{low, high}. */ 988 if (qualifiers & qualifier_internal) 989 continue; 990 991 /* Some builtins have different user-facing types 992 for certain arguments, encoded in d->mode. */ 993 if (qualifiers & qualifier_map_mode) 994 op_mode = d->mode; 995 996 /* For pointers, we want a pointer to the basic type 997 of the vector. */ 998 if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) 999 op_mode = GET_MODE_INNER (op_mode); 1000 1001 eltype = aarch64_simd_builtin_type 1002 (op_mode, 1003 (qualifiers & qualifier_unsigned) != 0, 1004 (qualifiers & qualifier_poly) != 0); 1005 gcc_assert (eltype != NULL); 1006 1007 /* Add qualifiers. */ 1008 if (qualifiers & qualifier_const) 1009 eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); 1010 1011 if (qualifiers & qualifier_pointer) 1012 eltype = build_pointer_type (eltype); 1013 1014 /* If we have reached arg_num == 0, we are at a non-void 1015 return type. Otherwise, we are still processing 1016 arguments. */ 1017 if (arg_num == 0) 1018 return_type = eltype; 1019 else 1020 args = tree_cons (NULL_TREE, eltype, args); 1021 } 1022 1023 ftype = build_function_type (return_type, args); 1024 1025 gcc_assert (ftype != NULL); 1026 1027 if (print_type_signature_p) 1028 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s", 1029 d->name, type_signature); 1030 else 1031 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", 1032 d->name); 1033 1034 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode); 1035 aarch64_builtin_decls[fcode] = fndecl; 1036 } 1037 1038 /* Initialize the remaining fcmla_laneq intrinsics. */ 1039 aarch64_init_fcmla_laneq_builtins (); 1040 } 1041 1042 static void 1043 aarch64_init_crc32_builtins () 1044 { 1045 tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned); 1046 unsigned int i = 0; 1047 1048 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) 1049 { 1050 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; 1051 tree argtype = aarch64_simd_builtin_std_type (d->mode, 1052 qualifier_unsigned); 1053 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); 1054 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); 1055 1056 aarch64_builtin_decls[d->fcode] = fndecl; 1057 } 1058 } 1059 1060 /* Add builtins for reciprocal square root. */ 1061 1062 void 1063 aarch64_init_builtin_rsqrt (void) 1064 { 1065 tree fndecl = NULL; 1066 tree ftype = NULL; 1067 1068 tree V2SF_type_node = build_vector_type (float_type_node, 2); 1069 tree V2DF_type_node = build_vector_type (double_type_node, 2); 1070 tree V4SF_type_node = build_vector_type (float_type_node, 4); 1071 1072 struct builtin_decls_data 1073 { 1074 tree type_node; 1075 const char *builtin_name; 1076 int function_code; 1077 }; 1078 1079 builtin_decls_data bdda[] = 1080 { 1081 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF }, 1082 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF }, 1083 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF }, 1084 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF }, 1085 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF } 1086 }; 1087 1088 builtin_decls_data *bdd = bdda; 1089 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); 1090 1091 for (; bdd < bdd_end; bdd++) 1092 { 1093 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); 1094 fndecl = aarch64_general_add_builtin (bdd->builtin_name, 1095 ftype, bdd->function_code); 1096 aarch64_builtin_decls[bdd->function_code] = fndecl; 1097 } 1098 } 1099 1100 /* Initialize the backend types that support the user-visible __fp16 1101 type, also initialize a pointer to that type, to be used when 1102 forming HFAs. */ 1103 1104 static void 1105 aarch64_init_fp16_types (void) 1106 { 1107 aarch64_fp16_type_node = make_node (REAL_TYPE); 1108 TYPE_PRECISION (aarch64_fp16_type_node) = 16; 1109 layout_type (aarch64_fp16_type_node); 1110 1111 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); 1112 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); 1113 } 1114 1115 /* Initialize the backend REAL_TYPE type supporting bfloat types. */ 1116 static void 1117 aarch64_init_bf16_types (void) 1118 { 1119 aarch64_bf16_type_node = make_node (REAL_TYPE); 1120 TYPE_PRECISION (aarch64_bf16_type_node) = 16; 1121 SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); 1122 layout_type (aarch64_bf16_type_node); 1123 1124 lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); 1125 aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); 1126 } 1127 1128 /* Pointer authentication builtins that will become NOP on legacy platform. 1129 Currently, these builtins are for internal use only (libgcc EH unwinder). */ 1130 1131 void 1132 aarch64_init_pauth_hint_builtins (void) 1133 { 1134 /* Pointer Authentication builtins. */ 1135 tree ftype_pointer_auth 1136 = build_function_type_list (ptr_type_node, ptr_type_node, 1137 unsigned_intDI_type_node, NULL_TREE); 1138 tree ftype_pointer_strip 1139 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); 1140 1141 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] 1142 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716", 1143 ftype_pointer_auth, 1144 AARCH64_PAUTH_BUILTIN_AUTIA1716); 1145 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] 1146 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716", 1147 ftype_pointer_auth, 1148 AARCH64_PAUTH_BUILTIN_PACIA1716); 1149 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716] 1150 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716", 1151 ftype_pointer_auth, 1152 AARCH64_PAUTH_BUILTIN_AUTIB1716); 1153 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716] 1154 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716", 1155 ftype_pointer_auth, 1156 AARCH64_PAUTH_BUILTIN_PACIB1716); 1157 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] 1158 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri", 1159 ftype_pointer_strip, 1160 AARCH64_PAUTH_BUILTIN_XPACLRI); 1161 } 1162 1163 /* Initialize the transactional memory extension (TME) builtins. */ 1164 static void 1165 aarch64_init_tme_builtins (void) 1166 { 1167 tree ftype_uint64_void 1168 = build_function_type_list (uint64_type_node, NULL); 1169 tree ftype_void_void 1170 = build_function_type_list (void_type_node, NULL); 1171 tree ftype_void_uint64 1172 = build_function_type_list (void_type_node, uint64_type_node, NULL); 1173 1174 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART] 1175 = aarch64_general_add_builtin ("__builtin_aarch64_tstart", 1176 ftype_uint64_void, 1177 AARCH64_TME_BUILTIN_TSTART); 1178 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST] 1179 = aarch64_general_add_builtin ("__builtin_aarch64_ttest", 1180 ftype_uint64_void, 1181 AARCH64_TME_BUILTIN_TTEST); 1182 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT] 1183 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit", 1184 ftype_void_void, 1185 AARCH64_TME_BUILTIN_TCOMMIT); 1186 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL] 1187 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel", 1188 ftype_void_uint64, 1189 AARCH64_TME_BUILTIN_TCANCEL); 1190 } 1191 1192 /* Add builtins for Random Number instructions. */ 1193 1194 static void 1195 aarch64_init_rng_builtins (void) 1196 { 1197 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node); 1198 tree ftype 1199 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL); 1200 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR] 1201 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype, 1202 AARCH64_BUILTIN_RNG_RNDR); 1203 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS] 1204 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype, 1205 AARCH64_BUILTIN_RNG_RNDRRS); 1206 } 1207 1208 /* Initialize the memory tagging extension (MTE) builtins. */ 1209 struct aarch64_mte 1210 { 1211 tree ftype; 1212 enum insn_code icode; 1213 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END - 1214 AARCH64_MEMTAG_BUILTIN_START - 1]; 1215 1216 static void 1217 aarch64_init_memtag_builtins (void) 1218 { 1219 tree fntype = NULL; 1220 1221 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \ 1222 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \ 1223 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \ 1224 T, AARCH64_MEMTAG_BUILTIN_##F); \ 1225 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \ 1226 AARCH64_MEMTAG_BUILTIN_START - 1].ftype = T; \ 1227 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \ 1228 AARCH64_MEMTAG_BUILTIN_START - 1].icode = CODE_FOR_##I; 1229 1230 fntype = build_function_type_list (ptr_type_node, ptr_type_node, 1231 uint64_type_node, NULL); 1232 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype); 1233 1234 fntype = build_function_type_list (uint64_type_node, ptr_type_node, 1235 uint64_type_node, NULL); 1236 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype); 1237 1238 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node, 1239 ptr_type_node, NULL); 1240 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype); 1241 1242 fntype = build_function_type_list (ptr_type_node, ptr_type_node, 1243 unsigned_type_node, NULL); 1244 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype); 1245 1246 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL); 1247 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype); 1248 1249 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL); 1250 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype); 1251 1252 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL 1253 } 1254 1255 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ 1256 1257 void 1258 aarch64_general_init_builtins (void) 1259 { 1260 tree ftype_set_fpr 1261 = build_function_type_list (void_type_node, unsigned_type_node, NULL); 1262 tree ftype_get_fpr 1263 = build_function_type_list (unsigned_type_node, NULL); 1264 1265 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] 1266 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr", 1267 ftype_get_fpr, 1268 AARCH64_BUILTIN_GET_FPCR); 1269 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] 1270 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr", 1271 ftype_set_fpr, 1272 AARCH64_BUILTIN_SET_FPCR); 1273 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] 1274 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr", 1275 ftype_get_fpr, 1276 AARCH64_BUILTIN_GET_FPSR); 1277 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] 1278 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr", 1279 ftype_set_fpr, 1280 AARCH64_BUILTIN_SET_FPSR); 1281 1282 aarch64_init_fp16_types (); 1283 1284 aarch64_init_bf16_types (); 1285 1286 if (TARGET_SIMD) 1287 aarch64_init_simd_builtins (); 1288 1289 aarch64_init_crc32_builtins (); 1290 aarch64_init_builtin_rsqrt (); 1291 aarch64_init_rng_builtins (); 1292 1293 tree ftype_jcvt 1294 = build_function_type_list (intSI_type_node, double_type_node, NULL); 1295 aarch64_builtin_decls[AARCH64_JSCVT] 1296 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt, 1297 AARCH64_JSCVT); 1298 1299 /* Initialize pointer authentication builtins which are backed by instructions 1300 in NOP encoding space. 1301 1302 NOTE: these builtins are supposed to be used by libgcc unwinder only, as 1303 there is no support on return address signing under ILP32, we don't 1304 register them. */ 1305 if (!TARGET_ILP32) 1306 aarch64_init_pauth_hint_builtins (); 1307 1308 if (TARGET_TME) 1309 aarch64_init_tme_builtins (); 1310 1311 if (TARGET_MEMTAG) 1312 aarch64_init_memtag_builtins (); 1313 } 1314 1315 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */ 1316 tree 1317 aarch64_general_builtin_decl (unsigned code, bool) 1318 { 1319 if (code >= AARCH64_BUILTIN_MAX) 1320 return error_mark_node; 1321 1322 return aarch64_builtin_decls[code]; 1323 } 1324 1325 typedef enum 1326 { 1327 SIMD_ARG_COPY_TO_REG, 1328 SIMD_ARG_CONSTANT, 1329 SIMD_ARG_LANE_INDEX, 1330 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, 1331 SIMD_ARG_LANE_PAIR_INDEX, 1332 SIMD_ARG_LANE_QUADTUP_INDEX, 1333 SIMD_ARG_STOP 1334 } builtin_simd_arg; 1335 1336 1337 static rtx 1338 aarch64_simd_expand_args (rtx target, int icode, int have_retval, 1339 tree exp, builtin_simd_arg *args, 1340 machine_mode builtin_mode) 1341 { 1342 rtx pat; 1343 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */ 1344 int opc = 0; 1345 1346 if (have_retval) 1347 { 1348 machine_mode tmode = insn_data[icode].operand[0].mode; 1349 if (!target 1350 || GET_MODE (target) != tmode 1351 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 1352 target = gen_reg_rtx (tmode); 1353 op[opc++] = target; 1354 } 1355 1356 for (;;) 1357 { 1358 builtin_simd_arg thisarg = args[opc - have_retval]; 1359 1360 if (thisarg == SIMD_ARG_STOP) 1361 break; 1362 else 1363 { 1364 tree arg = CALL_EXPR_ARG (exp, opc - have_retval); 1365 machine_mode mode = insn_data[icode].operand[opc].mode; 1366 op[opc] = expand_normal (arg); 1367 1368 switch (thisarg) 1369 { 1370 case SIMD_ARG_COPY_TO_REG: 1371 if (POINTER_TYPE_P (TREE_TYPE (arg))) 1372 op[opc] = convert_memory_address (Pmode, op[opc]); 1373 /*gcc_assert (GET_MODE (op[opc]) == mode); */ 1374 if (!(*insn_data[icode].operand[opc].predicate) 1375 (op[opc], mode)) 1376 op[opc] = copy_to_mode_reg (mode, op[opc]); 1377 break; 1378 1379 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX: 1380 gcc_assert (opc > 1); 1381 if (CONST_INT_P (op[opc])) 1382 { 1383 unsigned int nunits 1384 = GET_MODE_NUNITS (builtin_mode).to_constant (); 1385 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); 1386 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1387 op[opc] = aarch64_endian_lane_rtx (builtin_mode, 1388 INTVAL (op[opc])); 1389 } 1390 goto constant_arg; 1391 1392 case SIMD_ARG_LANE_INDEX: 1393 /* Must be a previous operand into which this is an index. */ 1394 gcc_assert (opc > 0); 1395 if (CONST_INT_P (op[opc])) 1396 { 1397 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; 1398 unsigned int nunits 1399 = GET_MODE_NUNITS (vmode).to_constant (); 1400 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); 1401 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1402 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); 1403 } 1404 /* If the lane index isn't a constant then error out. */ 1405 goto constant_arg; 1406 1407 case SIMD_ARG_LANE_PAIR_INDEX: 1408 /* Must be a previous operand into which this is an index and 1409 index is restricted to nunits / 2. */ 1410 gcc_assert (opc > 0); 1411 if (CONST_INT_P (op[opc])) 1412 { 1413 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; 1414 unsigned int nunits 1415 = GET_MODE_NUNITS (vmode).to_constant (); 1416 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp); 1417 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1418 int lane = INTVAL (op[opc]); 1419 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), 1420 SImode); 1421 } 1422 /* If the lane index isn't a constant then error out. */ 1423 goto constant_arg; 1424 case SIMD_ARG_LANE_QUADTUP_INDEX: 1425 /* Must be a previous operand into which this is an index and 1426 index is restricted to nunits / 4. */ 1427 gcc_assert (opc > 0); 1428 if (CONST_INT_P (op[opc])) 1429 { 1430 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; 1431 unsigned int nunits 1432 = GET_MODE_NUNITS (vmode).to_constant (); 1433 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp); 1434 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1435 int lane = INTVAL (op[opc]); 1436 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), 1437 SImode); 1438 } 1439 /* If the lane index isn't a constant then error out. */ 1440 goto constant_arg; 1441 case SIMD_ARG_CONSTANT: 1442 constant_arg: 1443 if (!(*insn_data[icode].operand[opc].predicate) 1444 (op[opc], mode)) 1445 { 1446 error ("%Kargument %d must be a constant immediate", 1447 exp, opc + 1 - have_retval); 1448 return const0_rtx; 1449 } 1450 break; 1451 1452 case SIMD_ARG_STOP: 1453 gcc_unreachable (); 1454 } 1455 1456 opc++; 1457 } 1458 } 1459 1460 switch (opc) 1461 { 1462 case 1: 1463 pat = GEN_FCN (icode) (op[0]); 1464 break; 1465 1466 case 2: 1467 pat = GEN_FCN (icode) (op[0], op[1]); 1468 break; 1469 1470 case 3: 1471 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 1472 break; 1473 1474 case 4: 1475 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 1476 break; 1477 1478 case 5: 1479 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); 1480 break; 1481 1482 case 6: 1483 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); 1484 break; 1485 1486 default: 1487 gcc_unreachable (); 1488 } 1489 1490 if (!pat) 1491 return NULL_RTX; 1492 1493 emit_insn (pat); 1494 1495 return target; 1496 } 1497 1498 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */ 1499 rtx 1500 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) 1501 { 1502 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK) 1503 { 1504 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0)); 1505 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1)); 1506 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize) 1507 && UINTVAL (elementsize) != 0 1508 && UINTVAL (totalsize) != 0) 1509 { 1510 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2)); 1511 if (CONST_INT_P (lane_idx)) 1512 aarch64_simd_lane_bounds (lane_idx, 0, 1513 UINTVAL (totalsize) 1514 / UINTVAL (elementsize), 1515 exp); 1516 else 1517 error ("%Klane index must be a constant immediate", exp); 1518 } 1519 else 1520 error ("%Ktotal size and element size must be a non-zero constant immediate", exp); 1521 /* Don't generate any RTL. */ 1522 return const0_rtx; 1523 } 1524 aarch64_simd_builtin_datum *d = 1525 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START]; 1526 enum insn_code icode = d->code; 1527 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; 1528 int num_args = insn_data[d->code].n_operands; 1529 int is_void = 0; 1530 int k; 1531 1532 is_void = !!(d->qualifiers[0] & qualifier_void); 1533 1534 num_args += is_void; 1535 1536 for (k = 1; k < num_args; k++) 1537 { 1538 /* We have four arrays of data, each indexed in a different fashion. 1539 qualifiers - element 0 always describes the function return type. 1540 operands - element 0 is either the operand for return value (if 1541 the function has a non-void return type) or the operand for the 1542 first argument. 1543 expr_args - element 0 always holds the first argument. 1544 args - element 0 is always used for the return type. */ 1545 int qualifiers_k = k; 1546 int operands_k = k - is_void; 1547 int expr_args_k = k - 1; 1548 1549 if (d->qualifiers[qualifiers_k] & qualifier_lane_index) 1550 args[k] = SIMD_ARG_LANE_INDEX; 1551 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) 1552 args[k] = SIMD_ARG_LANE_PAIR_INDEX; 1553 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index) 1554 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX; 1555 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) 1556 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; 1557 else if (d->qualifiers[qualifiers_k] & qualifier_immediate) 1558 args[k] = SIMD_ARG_CONSTANT; 1559 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) 1560 { 1561 rtx arg 1562 = expand_normal (CALL_EXPR_ARG (exp, 1563 (expr_args_k))); 1564 /* Handle constants only if the predicate allows it. */ 1565 bool op_const_int_p = 1566 (CONST_INT_P (arg) 1567 && (*insn_data[icode].operand[operands_k].predicate) 1568 (arg, insn_data[icode].operand[operands_k].mode)); 1569 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; 1570 } 1571 else 1572 args[k] = SIMD_ARG_COPY_TO_REG; 1573 1574 } 1575 args[k] = SIMD_ARG_STOP; 1576 1577 /* The interface to aarch64_simd_expand_args expects a 0 if 1578 the function is void, and a 1 if it is not. */ 1579 return aarch64_simd_expand_args 1580 (target, icode, !is_void, exp, &args[1], d->mode); 1581 } 1582 1583 rtx 1584 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) 1585 { 1586 rtx pat; 1587 aarch64_crc_builtin_datum *d 1588 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; 1589 enum insn_code icode = d->icode; 1590 tree arg0 = CALL_EXPR_ARG (exp, 0); 1591 tree arg1 = CALL_EXPR_ARG (exp, 1); 1592 rtx op0 = expand_normal (arg0); 1593 rtx op1 = expand_normal (arg1); 1594 machine_mode tmode = insn_data[icode].operand[0].mode; 1595 machine_mode mode0 = insn_data[icode].operand[1].mode; 1596 machine_mode mode1 = insn_data[icode].operand[2].mode; 1597 1598 if (! target 1599 || GET_MODE (target) != tmode 1600 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 1601 target = gen_reg_rtx (tmode); 1602 1603 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 1604 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 1605 1606 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 1607 op0 = copy_to_mode_reg (mode0, op0); 1608 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 1609 op1 = copy_to_mode_reg (mode1, op1); 1610 1611 pat = GEN_FCN (icode) (target, op0, op1); 1612 if (!pat) 1613 return NULL_RTX; 1614 1615 emit_insn (pat); 1616 return target; 1617 } 1618 1619 /* Function to expand reciprocal square root builtins. */ 1620 1621 static rtx 1622 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target) 1623 { 1624 tree arg0 = CALL_EXPR_ARG (exp, 0); 1625 rtx op0 = expand_normal (arg0); 1626 1627 rtx (*gen) (rtx, rtx); 1628 1629 switch (fcode) 1630 { 1631 case AARCH64_BUILTIN_RSQRT_DF: 1632 gen = gen_rsqrtdf2; 1633 break; 1634 case AARCH64_BUILTIN_RSQRT_SF: 1635 gen = gen_rsqrtsf2; 1636 break; 1637 case AARCH64_BUILTIN_RSQRT_V2DF: 1638 gen = gen_rsqrtv2df2; 1639 break; 1640 case AARCH64_BUILTIN_RSQRT_V2SF: 1641 gen = gen_rsqrtv2sf2; 1642 break; 1643 case AARCH64_BUILTIN_RSQRT_V4SF: 1644 gen = gen_rsqrtv4sf2; 1645 break; 1646 default: gcc_unreachable (); 1647 } 1648 1649 if (!target) 1650 target = gen_reg_rtx (GET_MODE (op0)); 1651 1652 emit_insn (gen (target, op0)); 1653 1654 return target; 1655 } 1656 1657 /* Expand a FCMLA lane expression EXP with code FCODE and 1658 result going to TARGET if that is convenient. */ 1659 1660 rtx 1661 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) 1662 { 1663 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1; 1664 aarch64_fcmla_laneq_builtin_datum* d 1665 = &aarch64_fcmla_lane_builtin_data[bcode]; 1666 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require (); 1667 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0))); 1668 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1))); 1669 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2))); 1670 tree tmp = CALL_EXPR_ARG (exp, 3); 1671 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER); 1672 1673 /* Validate that the lane index is a constant. */ 1674 if (!CONST_INT_P (lane_idx)) 1675 { 1676 error ("%Kargument %d must be a constant immediate", exp, 4); 1677 return const0_rtx; 1678 } 1679 1680 /* Validate that the index is within the expected range. */ 1681 int nunits = GET_MODE_NUNITS (quadmode).to_constant (); 1682 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp); 1683 1684 /* Generate the correct register and mode. */ 1685 int lane = INTVAL (lane_idx); 1686 1687 if (lane < nunits / 4) 1688 op2 = simplify_gen_subreg (d->mode, op2, quadmode, 1689 subreg_lowpart_offset (d->mode, quadmode)); 1690 else 1691 { 1692 /* Select the upper 64 bits, either a V2SF or V4HF, this however 1693 is quite messy, as the operation required even though simple 1694 doesn't have a simple RTL pattern, and seems it's quite hard to 1695 define using a single RTL pattern. The target generic version 1696 gen_highpart_mode generates code that isn't optimal. */ 1697 rtx temp1 = gen_reg_rtx (d->mode); 1698 rtx temp2 = gen_reg_rtx (DImode); 1699 temp1 = simplify_gen_subreg (d->mode, op2, quadmode, 1700 subreg_lowpart_offset (d->mode, quadmode)); 1701 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0); 1702 if (BYTES_BIG_ENDIAN) 1703 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx)); 1704 else 1705 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx)); 1706 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0); 1707 1708 /* And recalculate the index. */ 1709 lane -= nunits / 4; 1710 } 1711 1712 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4 1713 (max nunits in range check) are valid. Which means only 0-1, so we 1714 only need to know the order in a V2mode. */ 1715 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane); 1716 1717 if (!target) 1718 target = gen_reg_rtx (d->mode); 1719 else 1720 target = force_reg (d->mode, target); 1721 1722 rtx pat = NULL_RTX; 1723 1724 if (d->lane) 1725 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx); 1726 else 1727 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 1728 1729 if (!pat) 1730 return NULL_RTX; 1731 1732 emit_insn (pat); 1733 return target; 1734 } 1735 1736 /* Function to expand an expression EXP which calls one of the Transactional 1737 Memory Extension (TME) builtins FCODE with the result going to TARGET. */ 1738 static rtx 1739 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target) 1740 { 1741 switch (fcode) 1742 { 1743 case AARCH64_TME_BUILTIN_TSTART: 1744 target = gen_reg_rtx (DImode); 1745 emit_insn (GEN_FCN (CODE_FOR_tstart) (target)); 1746 break; 1747 1748 case AARCH64_TME_BUILTIN_TTEST: 1749 target = gen_reg_rtx (DImode); 1750 emit_insn (GEN_FCN (CODE_FOR_ttest) (target)); 1751 break; 1752 1753 case AARCH64_TME_BUILTIN_TCOMMIT: 1754 emit_insn (GEN_FCN (CODE_FOR_tcommit) ()); 1755 break; 1756 1757 case AARCH64_TME_BUILTIN_TCANCEL: 1758 { 1759 tree arg0 = CALL_EXPR_ARG (exp, 0); 1760 rtx op0 = expand_normal (arg0); 1761 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536) 1762 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0)); 1763 else 1764 { 1765 error ("%Kargument must be a 16-bit constant immediate", exp); 1766 return const0_rtx; 1767 } 1768 } 1769 break; 1770 1771 default : 1772 gcc_unreachable (); 1773 } 1774 return target; 1775 } 1776 1777 /* Expand a random number builtin EXP with code FCODE, putting the result 1778 int TARGET. If IGNORE is true the return value is ignored. */ 1779 1780 rtx 1781 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore) 1782 { 1783 rtx pat; 1784 enum insn_code icode; 1785 if (fcode == AARCH64_BUILTIN_RNG_RNDR) 1786 icode = CODE_FOR_aarch64_rndr; 1787 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS) 1788 icode = CODE_FOR_aarch64_rndrrs; 1789 else 1790 gcc_unreachable (); 1791 1792 rtx rand = gen_reg_rtx (DImode); 1793 pat = GEN_FCN (icode) (rand); 1794 if (!pat) 1795 return NULL_RTX; 1796 1797 tree arg0 = CALL_EXPR_ARG (exp, 0); 1798 rtx res_addr = expand_normal (arg0); 1799 res_addr = convert_memory_address (Pmode, res_addr); 1800 rtx res_mem = gen_rtx_MEM (DImode, res_addr); 1801 emit_insn (pat); 1802 emit_move_insn (res_mem, rand); 1803 /* If the status result is unused don't generate the CSET code. */ 1804 if (ignore) 1805 return target; 1806 1807 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM); 1808 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx); 1809 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg)); 1810 return target; 1811 } 1812 1813 /* Expand an expression EXP that calls a MEMTAG built-in FCODE 1814 with result going to TARGET. */ 1815 static rtx 1816 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) 1817 { 1818 if (TARGET_ILP32) 1819 { 1820 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>"); 1821 return const0_rtx; 1822 } 1823 1824 rtx pat = NULL; 1825 enum insn_code icode = aarch64_memtag_builtin_data[fcode - 1826 AARCH64_MEMTAG_BUILTIN_START - 1].icode; 1827 1828 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); 1829 machine_mode mode0 = GET_MODE (op0); 1830 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0); 1831 op0 = convert_to_mode (DImode, op0, true); 1832 1833 switch (fcode) 1834 { 1835 case AARCH64_MEMTAG_BUILTIN_IRG: 1836 case AARCH64_MEMTAG_BUILTIN_GMI: 1837 case AARCH64_MEMTAG_BUILTIN_SUBP: 1838 case AARCH64_MEMTAG_BUILTIN_INC_TAG: 1839 { 1840 if (! target 1841 || GET_MODE (target) != DImode 1842 || ! (*insn_data[icode].operand[0].predicate) (target, DImode)) 1843 target = gen_reg_rtx (DImode); 1844 1845 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG) 1846 { 1847 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1)); 1848 1849 if ((*insn_data[icode].operand[3].predicate) (op1, QImode)) 1850 { 1851 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1); 1852 break; 1853 } 1854 error ("%Kargument %d must be a constant immediate " 1855 "in range [0,15]", exp, 2); 1856 return const0_rtx; 1857 } 1858 else 1859 { 1860 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1)); 1861 machine_mode mode1 = GET_MODE (op1); 1862 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1); 1863 op1 = convert_to_mode (DImode, op1, true); 1864 pat = GEN_FCN (icode) (target, op0, op1); 1865 } 1866 break; 1867 } 1868 case AARCH64_MEMTAG_BUILTIN_GET_TAG: 1869 target = op0; 1870 pat = GEN_FCN (icode) (target, op0, const0_rtx); 1871 break; 1872 case AARCH64_MEMTAG_BUILTIN_SET_TAG: 1873 pat = GEN_FCN (icode) (op0, op0, const0_rtx); 1874 break; 1875 default: 1876 gcc_unreachable(); 1877 } 1878 1879 if (!pat) 1880 return NULL_RTX; 1881 1882 emit_insn (pat); 1883 return target; 1884 } 1885 1886 /* Expand an expression EXP that calls built-in function FCODE, 1887 with result going to TARGET if that's convenient. IGNORE is true 1888 if the result of the builtin is ignored. */ 1889 rtx 1890 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, 1891 int ignore) 1892 { 1893 int icode; 1894 rtx pat, op0; 1895 tree arg0; 1896 1897 switch (fcode) 1898 { 1899 case AARCH64_BUILTIN_GET_FPCR: 1900 case AARCH64_BUILTIN_SET_FPCR: 1901 case AARCH64_BUILTIN_GET_FPSR: 1902 case AARCH64_BUILTIN_SET_FPSR: 1903 if ((fcode == AARCH64_BUILTIN_GET_FPCR) 1904 || (fcode == AARCH64_BUILTIN_GET_FPSR)) 1905 { 1906 icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? 1907 CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; 1908 target = gen_reg_rtx (SImode); 1909 pat = GEN_FCN (icode) (target); 1910 } 1911 else 1912 { 1913 target = NULL_RTX; 1914 icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? 1915 CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; 1916 arg0 = CALL_EXPR_ARG (exp, 0); 1917 op0 = force_reg (SImode, expand_normal (arg0)); 1918 pat = GEN_FCN (icode) (op0); 1919 } 1920 emit_insn (pat); 1921 return target; 1922 1923 case AARCH64_PAUTH_BUILTIN_AUTIA1716: 1924 case AARCH64_PAUTH_BUILTIN_PACIA1716: 1925 case AARCH64_PAUTH_BUILTIN_AUTIB1716: 1926 case AARCH64_PAUTH_BUILTIN_PACIB1716: 1927 case AARCH64_PAUTH_BUILTIN_XPACLRI: 1928 arg0 = CALL_EXPR_ARG (exp, 0); 1929 op0 = force_reg (Pmode, expand_normal (arg0)); 1930 1931 if (!target) 1932 target = gen_reg_rtx (Pmode); 1933 else 1934 target = force_reg (Pmode, target); 1935 1936 emit_move_insn (target, op0); 1937 1938 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI) 1939 { 1940 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM); 1941 icode = CODE_FOR_xpaclri; 1942 emit_move_insn (lr, op0); 1943 emit_insn (GEN_FCN (icode) ()); 1944 emit_move_insn (target, lr); 1945 } 1946 else 1947 { 1948 tree arg1 = CALL_EXPR_ARG (exp, 1); 1949 rtx op1 = force_reg (Pmode, expand_normal (arg1)); 1950 switch (fcode) 1951 { 1952 case AARCH64_PAUTH_BUILTIN_AUTIA1716: 1953 icode = CODE_FOR_autia1716; 1954 break; 1955 case AARCH64_PAUTH_BUILTIN_AUTIB1716: 1956 icode = CODE_FOR_autib1716; 1957 break; 1958 case AARCH64_PAUTH_BUILTIN_PACIA1716: 1959 icode = CODE_FOR_pacia1716; 1960 break; 1961 case AARCH64_PAUTH_BUILTIN_PACIB1716: 1962 icode = CODE_FOR_pacib1716; 1963 break; 1964 default: 1965 icode = 0; 1966 gcc_unreachable (); 1967 } 1968 1969 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); 1970 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); 1971 emit_move_insn (x17_reg, op0); 1972 emit_move_insn (x16_reg, op1); 1973 emit_insn (GEN_FCN (icode) ()); 1974 emit_move_insn (target, x17_reg); 1975 } 1976 1977 return target; 1978 1979 case AARCH64_JSCVT: 1980 { 1981 expand_operand ops[2]; 1982 create_output_operand (&ops[0], target, SImode); 1983 op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); 1984 create_input_operand (&ops[1], op0, DFmode); 1985 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops); 1986 return ops[0].value; 1987 } 1988 1989 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF: 1990 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF: 1991 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF: 1992 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF: 1993 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF: 1994 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF: 1995 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF: 1996 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF: 1997 return aarch64_expand_fcmla_builtin (exp, target, fcode); 1998 case AARCH64_BUILTIN_RNG_RNDR: 1999 case AARCH64_BUILTIN_RNG_RNDRRS: 2000 return aarch64_expand_rng_builtin (exp, target, fcode, ignore); 2001 } 2002 2003 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) 2004 return aarch64_simd_expand_builtin (fcode, exp, target); 2005 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) 2006 return aarch64_crc32_expand_builtin (fcode, exp, target); 2007 2008 if (fcode == AARCH64_BUILTIN_RSQRT_DF 2009 || fcode == AARCH64_BUILTIN_RSQRT_SF 2010 || fcode == AARCH64_BUILTIN_RSQRT_V2DF 2011 || fcode == AARCH64_BUILTIN_RSQRT_V2SF 2012 || fcode == AARCH64_BUILTIN_RSQRT_V4SF) 2013 return aarch64_expand_builtin_rsqrt (fcode, exp, target); 2014 2015 if (fcode == AARCH64_TME_BUILTIN_TSTART 2016 || fcode == AARCH64_TME_BUILTIN_TCOMMIT 2017 || fcode == AARCH64_TME_BUILTIN_TTEST 2018 || fcode == AARCH64_TME_BUILTIN_TCANCEL) 2019 return aarch64_expand_builtin_tme (fcode, exp, target); 2020 2021 if (fcode >= AARCH64_MEMTAG_BUILTIN_START 2022 && fcode <= AARCH64_MEMTAG_BUILTIN_END) 2023 return aarch64_expand_builtin_memtag (fcode, exp, target); 2024 2025 gcc_unreachable (); 2026 } 2027 2028 tree 2029 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, 2030 tree type_in) 2031 { 2032 machine_mode in_mode, out_mode; 2033 2034 if (TREE_CODE (type_out) != VECTOR_TYPE 2035 || TREE_CODE (type_in) != VECTOR_TYPE) 2036 return NULL_TREE; 2037 2038 out_mode = TYPE_MODE (type_out); 2039 in_mode = TYPE_MODE (type_in); 2040 2041 #undef AARCH64_CHECK_BUILTIN_MODE 2042 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 2043 #define AARCH64_FIND_FRINT_VARIANT(N) \ 2044 (AARCH64_CHECK_BUILTIN_MODE (2, D) \ 2045 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ 2046 : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ 2047 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ 2048 : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ 2049 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ 2050 : NULL_TREE))) 2051 switch (fn) 2052 { 2053 #undef AARCH64_CHECK_BUILTIN_MODE 2054 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2055 (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode) 2056 CASE_CFN_FLOOR: 2057 return AARCH64_FIND_FRINT_VARIANT (floor); 2058 CASE_CFN_CEIL: 2059 return AARCH64_FIND_FRINT_VARIANT (ceil); 2060 CASE_CFN_TRUNC: 2061 return AARCH64_FIND_FRINT_VARIANT (btrunc); 2062 CASE_CFN_ROUND: 2063 return AARCH64_FIND_FRINT_VARIANT (round); 2064 CASE_CFN_NEARBYINT: 2065 return AARCH64_FIND_FRINT_VARIANT (nearbyint); 2066 CASE_CFN_SQRT: 2067 return AARCH64_FIND_FRINT_VARIANT (sqrt); 2068 #undef AARCH64_CHECK_BUILTIN_MODE 2069 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2070 (out_mode == V##C##SImode && in_mode == V##C##N##Imode) 2071 CASE_CFN_CLZ: 2072 { 2073 if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2074 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; 2075 return NULL_TREE; 2076 } 2077 CASE_CFN_CTZ: 2078 { 2079 if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 2080 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si]; 2081 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2082 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; 2083 return NULL_TREE; 2084 } 2085 #undef AARCH64_CHECK_BUILTIN_MODE 2086 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2087 (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) 2088 CASE_CFN_IFLOOR: 2089 CASE_CFN_LFLOOR: 2090 CASE_CFN_LLFLOOR: 2091 { 2092 enum aarch64_builtins builtin; 2093 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 2094 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; 2095 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2096 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; 2097 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 2098 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; 2099 else 2100 return NULL_TREE; 2101 2102 return aarch64_builtin_decls[builtin]; 2103 } 2104 CASE_CFN_ICEIL: 2105 CASE_CFN_LCEIL: 2106 CASE_CFN_LLCEIL: 2107 { 2108 enum aarch64_builtins builtin; 2109 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 2110 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; 2111 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2112 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; 2113 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 2114 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; 2115 else 2116 return NULL_TREE; 2117 2118 return aarch64_builtin_decls[builtin]; 2119 } 2120 CASE_CFN_IROUND: 2121 CASE_CFN_LROUND: 2122 CASE_CFN_LLROUND: 2123 { 2124 enum aarch64_builtins builtin; 2125 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) 2126 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; 2127 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2128 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; 2129 else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) 2130 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; 2131 else 2132 return NULL_TREE; 2133 2134 return aarch64_builtin_decls[builtin]; 2135 } 2136 default: 2137 return NULL_TREE; 2138 } 2139 2140 return NULL_TREE; 2141 } 2142 2143 /* Return builtin for reciprocal square root. */ 2144 2145 tree 2146 aarch64_general_builtin_rsqrt (unsigned int fn) 2147 { 2148 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) 2149 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; 2150 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) 2151 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; 2152 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf) 2153 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF]; 2154 return NULL_TREE; 2155 } 2156 2157 #undef VAR1 2158 #define VAR1(T, N, MAP, A) \ 2159 case AARCH64_SIMD_BUILTIN_##T##_##N##A: 2160 2161 /* Try to fold a call to the built-in function with subcode FCODE. The 2162 function is passed the N_ARGS arguments in ARGS and it returns a value 2163 of type TYPE. Return the new expression on success and NULL_TREE on 2164 failure. */ 2165 tree 2166 aarch64_general_fold_builtin (unsigned int fcode, tree type, 2167 unsigned int n_args ATTRIBUTE_UNUSED, tree *args) 2168 { 2169 switch (fcode) 2170 { 2171 BUILTIN_VDQF (UNOP, abs, 2) 2172 return fold_build1 (ABS_EXPR, type, args[0]); 2173 VAR1 (UNOP, floatv2si, 2, v2sf) 2174 VAR1 (UNOP, floatv4si, 2, v4sf) 2175 VAR1 (UNOP, floatv2di, 2, v2df) 2176 return fold_build1 (FLOAT_EXPR, type, args[0]); 2177 default: 2178 break; 2179 } 2180 2181 return NULL_TREE; 2182 } 2183 2184 /* Try to fold STMT, given that it's a call to the built-in function with 2185 subcode FCODE. Return the new statement on success and null on 2186 failure. */ 2187 gimple * 2188 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) 2189 { 2190 gimple *new_stmt = NULL; 2191 unsigned nargs = gimple_call_num_args (stmt); 2192 tree *args = (nargs > 0 2193 ? gimple_call_arg_ptr (stmt, 0) 2194 : &error_mark_node); 2195 2196 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int 2197 and unsigned int; it will distinguish according to the types of 2198 the arguments to the __builtin. */ 2199 switch (fcode) 2200 { 2201 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) 2202 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, 2203 1, args[0]); 2204 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); 2205 break; 2206 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) 2207 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) 2208 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, 2209 1, args[0]); 2210 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); 2211 break; 2212 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) 2213 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) 2214 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, 2215 1, args[0]); 2216 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); 2217 break; 2218 BUILTIN_GPF (BINOP, fmulx, 0) 2219 { 2220 gcc_assert (nargs == 2); 2221 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; 2222 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; 2223 if (a0_cst_p || a1_cst_p) 2224 { 2225 if (a0_cst_p && a1_cst_p) 2226 { 2227 tree t0 = TREE_TYPE (args[0]); 2228 real_value a0 = (TREE_REAL_CST (args[0])); 2229 real_value a1 = (TREE_REAL_CST (args[1])); 2230 if (real_equal (&a1, &dconst0)) 2231 std::swap (a0, a1); 2232 /* According to real_equal (), +0 equals -0. */ 2233 if (real_equal (&a0, &dconst0) && real_isinf (&a1)) 2234 { 2235 real_value res = dconst2; 2236 res.sign = a0.sign ^ a1.sign; 2237 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 2238 REAL_CST, 2239 build_real (t0, res)); 2240 } 2241 else 2242 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 2243 MULT_EXPR, 2244 args[0], args[1]); 2245 } 2246 else /* a0_cst_p ^ a1_cst_p. */ 2247 { 2248 real_value const_part = a0_cst_p 2249 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); 2250 if (!real_equal (&const_part, &dconst0) 2251 && !real_isinf (&const_part)) 2252 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 2253 MULT_EXPR, args[0], 2254 args[1]); 2255 } 2256 } 2257 if (new_stmt) 2258 { 2259 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 2260 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 2261 } 2262 break; 2263 } 2264 default: 2265 break; 2266 } 2267 return new_stmt; 2268 } 2269 2270 void 2271 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 2272 { 2273 const unsigned AARCH64_FE_INVALID = 1; 2274 const unsigned AARCH64_FE_DIVBYZERO = 2; 2275 const unsigned AARCH64_FE_OVERFLOW = 4; 2276 const unsigned AARCH64_FE_UNDERFLOW = 8; 2277 const unsigned AARCH64_FE_INEXACT = 16; 2278 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID 2279 | AARCH64_FE_DIVBYZERO 2280 | AARCH64_FE_OVERFLOW 2281 | AARCH64_FE_UNDERFLOW 2282 | AARCH64_FE_INEXACT); 2283 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; 2284 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; 2285 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; 2286 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; 2287 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; 2288 2289 /* Generate the equivalence of : 2290 unsigned int fenv_cr; 2291 fenv_cr = __builtin_aarch64_get_fpcr (); 2292 2293 unsigned int fenv_sr; 2294 fenv_sr = __builtin_aarch64_get_fpsr (); 2295 2296 Now set all exceptions to non-stop 2297 unsigned int mask_cr 2298 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); 2299 unsigned int masked_cr; 2300 masked_cr = fenv_cr & mask_cr; 2301 2302 And clear all exception flags 2303 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; 2304 unsigned int masked_cr; 2305 masked_sr = fenv_sr & mask_sr; 2306 2307 __builtin_aarch64_set_cr (masked_cr); 2308 __builtin_aarch64_set_sr (masked_sr); */ 2309 2310 fenv_cr = create_tmp_var_raw (unsigned_type_node); 2311 fenv_sr = create_tmp_var_raw (unsigned_type_node); 2312 2313 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; 2314 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; 2315 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; 2316 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; 2317 2318 mask_cr = build_int_cst (unsigned_type_node, 2319 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); 2320 mask_sr = build_int_cst (unsigned_type_node, 2321 ~(AARCH64_FE_ALL_EXCEPT)); 2322 2323 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node, 2324 fenv_cr, build_call_expr (get_fpcr, 0), 2325 NULL_TREE, NULL_TREE); 2326 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node, 2327 fenv_sr, build_call_expr (get_fpsr, 0), 2328 NULL_TREE, NULL_TREE); 2329 2330 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); 2331 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); 2332 2333 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); 2334 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); 2335 2336 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, 2337 hold_fnclex_sr); 2338 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, 2339 masked_fenv_sr); 2340 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); 2341 2342 *hold = build2 (COMPOUND_EXPR, void_type_node, 2343 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), 2344 hold_fnclex); 2345 2346 /* Store the value of masked_fenv to clear the exceptions: 2347 __builtin_aarch64_set_fpsr (masked_fenv_sr); */ 2348 2349 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); 2350 2351 /* Generate the equivalent of : 2352 unsigned int new_fenv_var; 2353 new_fenv_var = __builtin_aarch64_get_fpsr (); 2354 2355 __builtin_aarch64_set_fpsr (fenv_sr); 2356 2357 __atomic_feraiseexcept (new_fenv_var); */ 2358 2359 new_fenv_var = create_tmp_var_raw (unsigned_type_node); 2360 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node, 2361 new_fenv_var, build_call_expr (get_fpsr, 0), 2362 NULL_TREE, NULL_TREE); 2363 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); 2364 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 2365 update_call = build_call_expr (atomic_feraiseexcept, 1, 2366 fold_convert (integer_type_node, new_fenv_var)); 2367 *update = build2 (COMPOUND_EXPR, void_type_node, 2368 build2 (COMPOUND_EXPR, void_type_node, 2369 reload_fenv, restore_fnenv), update_call); 2370 } 2371 2372 /* Resolve overloaded MEMTAG build-in functions. */ 2373 #define AARCH64_BUILTIN_SUBCODE(F) \ 2374 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT) 2375 2376 static tree 2377 aarch64_resolve_overloaded_memtag (location_t loc, 2378 tree fndecl, void *pass_params) 2379 { 2380 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params); 2381 unsigned param_num = params ? params->length() : 0; 2382 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl); 2383 tree inittype = aarch64_memtag_builtin_data[ 2384 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype; 2385 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1; 2386 2387 if (param_num != arg_num) 2388 { 2389 TREE_TYPE (fndecl) = inittype; 2390 return NULL_TREE; 2391 } 2392 tree retype = NULL; 2393 2394 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP) 2395 { 2396 tree t0 = TREE_TYPE ((*params)[0]); 2397 tree t1 = TREE_TYPE ((*params)[1]); 2398 2399 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE) 2400 t0 = ptr_type_node; 2401 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE) 2402 t1 = ptr_type_node; 2403 2404 if (TYPE_MODE (t0) != DImode) 2405 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit", 2406 (int)tree_to_shwi (DECL_SIZE ((*params)[0]))); 2407 2408 if (TYPE_MODE (t1) != DImode) 2409 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit", 2410 (int)tree_to_shwi (DECL_SIZE ((*params)[1]))); 2411 2412 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL); 2413 } 2414 else 2415 { 2416 tree t0 = TREE_TYPE ((*params)[0]); 2417 2418 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE) 2419 { 2420 TREE_TYPE (fndecl) = inittype; 2421 return NULL_TREE; 2422 } 2423 2424 if (TYPE_MODE (t0) != DImode) 2425 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit", 2426 (int)tree_to_shwi (DECL_SIZE ((*params)[0]))); 2427 2428 switch (fcode) 2429 { 2430 case AARCH64_MEMTAG_BUILTIN_IRG: 2431 retype = build_function_type_list (t0, t0, uint64_type_node, NULL); 2432 break; 2433 case AARCH64_MEMTAG_BUILTIN_GMI: 2434 retype = build_function_type_list (uint64_type_node, t0, 2435 uint64_type_node, NULL); 2436 break; 2437 case AARCH64_MEMTAG_BUILTIN_INC_TAG: 2438 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL); 2439 break; 2440 case AARCH64_MEMTAG_BUILTIN_SET_TAG: 2441 retype = build_function_type_list (void_type_node, t0, NULL); 2442 break; 2443 case AARCH64_MEMTAG_BUILTIN_GET_TAG: 2444 retype = build_function_type_list (t0, t0, NULL); 2445 break; 2446 default: 2447 return NULL_TREE; 2448 } 2449 } 2450 2451 if (!retype || retype == error_mark_node) 2452 TREE_TYPE (fndecl) = inittype; 2453 else 2454 TREE_TYPE (fndecl) = retype; 2455 2456 return NULL_TREE; 2457 } 2458 2459 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.c. */ 2460 tree 2461 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function, 2462 void *pass_params) 2463 { 2464 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function); 2465 2466 if (fcode >= AARCH64_MEMTAG_BUILTIN_START 2467 && fcode <= AARCH64_MEMTAG_BUILTIN_END) 2468 return aarch64_resolve_overloaded_memtag(loc, function, pass_params); 2469 2470 return NULL_TREE; 2471 } 2472 2473 #undef AARCH64_CHECK_BUILTIN_MODE 2474 #undef AARCH64_FIND_FRINT_VARIANT 2475 #undef CF0 2476 #undef CF1 2477 #undef CF2 2478 #undef CF3 2479 #undef CF4 2480 #undef CF10 2481 #undef VAR1 2482 #undef VAR2 2483 #undef VAR3 2484 #undef VAR4 2485 #undef VAR5 2486 #undef VAR6 2487 #undef VAR7 2488 #undef VAR8 2489 #undef VAR9 2490 #undef VAR10 2491 #undef VAR11 2492 2493 #include "gt-aarch64-builtins.h" 2494