1;; Machine description for AArch64 AdvSIMD architecture. 2;; Copyright (C) 2011-2019 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_expand "mov<mode>" 22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") 23 (match_operand:VALL_F16 1 "general_operand" ""))] 24 "TARGET_SIMD" 25 " 26 /* Force the operand into a register if it is not an 27 immediate whose use can be replaced with xzr. 28 If the mode is 16 bytes wide, then we will be doing 29 a stp in DI mode, so we check the validity of that. 30 If the mode is 8 bytes wide, then we will do doing a 31 normal str, so the check need not apply. */ 32 if (GET_CODE (operands[0]) == MEM 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) 35 && aarch64_mem_pair_operand (operands[0], DImode)) 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) 37 operands[1] = force_reg (<MODE>mode, operands[1]); 38 " 39) 40 41(define_expand "movmisalign<mode>" 42 [(set (match_operand:VALL 0 "nonimmediate_operand" "") 43 (match_operand:VALL 1 "general_operand" ""))] 44 "TARGET_SIMD" 45{ 46 /* This pattern is not permitted to fail during expansion: if both arguments 47 are non-registers (e.g. memory := constant, which can be created by the 48 auto-vectorizer), force operand 1 into a register. */ 49 if (!register_operand (operands[0], <MODE>mode) 50 && !register_operand (operands[1], <MODE>mode)) 51 operands[1] = force_reg (<MODE>mode, operands[1]); 52}) 53 54(define_insn "aarch64_simd_dup<mode>" 55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") 56 (vec_duplicate:VDQ_I 57 (match_operand:<VEL> 1 "register_operand" "w,?r")))] 58 "TARGET_SIMD" 59 "@ 60 dup\\t%0.<Vtype>, %1.<Vetype>[0] 61 dup\\t%0.<Vtype>, %<vw>1" 62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")] 63) 64 65(define_insn "aarch64_simd_dup<mode>" 66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 67 (vec_duplicate:VDQF_F16 68 (match_operand:<VEL> 1 "register_operand" "w")))] 69 "TARGET_SIMD" 70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]" 71 [(set_attr "type" "neon_dup<q>")] 72) 73 74(define_insn "aarch64_dup_lane<mode>" 75 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 76 (vec_duplicate:VALL_F16 77 (vec_select:<VEL> 78 (match_operand:VALL_F16 1 "register_operand" "w") 79 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 80 )))] 81 "TARGET_SIMD" 82 { 83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 85 } 86 [(set_attr "type" "neon_dup<q>")] 87) 88 89(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" 90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 91 (vec_duplicate:VALL_F16_NO_V2Q 92 (vec_select:<VEL> 93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 94 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 95 )))] 96 "TARGET_SIMD" 97 { 98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 100 } 101 [(set_attr "type" "neon_dup<q>")] 102) 103 104(define_insn "*aarch64_simd_mov<VD:mode>" 105 [(set (match_operand:VD 0 "nonimmediate_operand" 106 "=w, m, m, w, ?r, ?w, ?r, w") 107 (match_operand:VD 1 "general_operand" 108 "m, Dz, w, w, w, r, r, Dn"))] 109 "TARGET_SIMD 110 && (register_operand (operands[0], <MODE>mode) 111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 112{ 113 switch (which_alternative) 114 { 115 case 0: return "ldr\t%d0, %1"; 116 case 1: return "str\txzr, %0"; 117 case 2: return "str\t%d1, %0"; 118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 119 case 4: return "umov\t%0, %1.d[0]"; 120 case 5: return "fmov\t%d0, %1"; 121 case 6: return "mov\t%0, %1"; 122 case 7: 123 return aarch64_output_simd_mov_immediate (operands[1], 64); 124 default: gcc_unreachable (); 125 } 126} 127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ 128 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 129 mov_reg, neon_move<q>")] 130) 131 132(define_insn "*aarch64_simd_mov<VQ:mode>" 133 [(set (match_operand:VQ 0 "nonimmediate_operand" 134 "=w, Umn, m, w, ?r, ?w, ?r, w") 135 (match_operand:VQ 1 "general_operand" 136 "m, Dz, w, w, w, r, r, Dn"))] 137 "TARGET_SIMD 138 && (register_operand (operands[0], <MODE>mode) 139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 140{ 141 switch (which_alternative) 142 { 143 case 0: 144 return "ldr\t%q0, %1"; 145 case 1: 146 return "stp\txzr, xzr, %0"; 147 case 2: 148 return "str\t%q1, %0"; 149 case 3: 150 return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 151 case 4: 152 case 5: 153 case 6: 154 return "#"; 155 case 7: 156 return aarch64_output_simd_mov_immediate (operands[1], 128); 157 default: 158 gcc_unreachable (); 159 } 160} 161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ 162 neon_logic<q>, multiple, multiple,\ 163 multiple, neon_move<q>") 164 (set_attr "length" "4,4,4,4,8,8,8,4")] 165) 166 167;; When storing lane zero we can use the normal STR and its more permissive 168;; addressing modes. 169 170(define_insn "aarch64_store_lane0<mode>" 171 [(set (match_operand:<VEL> 0 "memory_operand" "=m") 172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") 173 (parallel [(match_operand 2 "const_int_operand" "n")])))] 174 "TARGET_SIMD 175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" 176 "str\\t%<Vetype>1, %0" 177 [(set_attr "type" "neon_store1_1reg<q>")] 178) 179 180(define_insn "load_pair<DREG:mode><DREG2:mode>" 181 [(set (match_operand:DREG 0 "register_operand" "=w") 182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) 183 (set (match_operand:DREG2 2 "register_operand" "=w") 184 (match_operand:DREG2 3 "memory_operand" "m"))] 185 "TARGET_SIMD 186 && rtx_equal_p (XEXP (operands[3], 0), 187 plus_constant (Pmode, 188 XEXP (operands[1], 0), 189 GET_MODE_SIZE (<DREG:MODE>mode)))" 190 "ldp\\t%d0, %d2, %1" 191 [(set_attr "type" "neon_ldp")] 192) 193 194(define_insn "vec_store_pair<DREG:mode><DREG2:mode>" 195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") 196 (match_operand:DREG 1 "register_operand" "w")) 197 (set (match_operand:DREG2 2 "memory_operand" "=m") 198 (match_operand:DREG2 3 "register_operand" "w"))] 199 "TARGET_SIMD 200 && rtx_equal_p (XEXP (operands[2], 0), 201 plus_constant (Pmode, 202 XEXP (operands[0], 0), 203 GET_MODE_SIZE (<DREG:MODE>mode)))" 204 "stp\\t%d1, %d3, %0" 205 [(set_attr "type" "neon_stp")] 206) 207 208(define_insn "load_pair<VQ:mode><VQ2:mode>" 209 [(set (match_operand:VQ 0 "register_operand" "=w") 210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) 211 (set (match_operand:VQ2 2 "register_operand" "=w") 212 (match_operand:VQ2 3 "memory_operand" "m"))] 213 "TARGET_SIMD 214 && rtx_equal_p (XEXP (operands[3], 0), 215 plus_constant (Pmode, 216 XEXP (operands[1], 0), 217 GET_MODE_SIZE (<VQ:MODE>mode)))" 218 "ldp\\t%q0, %q2, %1" 219 [(set_attr "type" "neon_ldp_q")] 220) 221 222(define_insn "vec_store_pair<VQ:mode><VQ2:mode>" 223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump") 224 (match_operand:VQ 1 "register_operand" "w")) 225 (set (match_operand:VQ2 2 "memory_operand" "=m") 226 (match_operand:VQ2 3 "register_operand" "w"))] 227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), 228 plus_constant (Pmode, 229 XEXP (operands[0], 0), 230 GET_MODE_SIZE (<VQ:MODE>mode)))" 231 "stp\\t%q1, %q3, %0" 232 [(set_attr "type" "neon_stp_q")] 233) 234 235 236(define_split 237 [(set (match_operand:VQ 0 "register_operand" "") 238 (match_operand:VQ 1 "register_operand" ""))] 239 "TARGET_SIMD && reload_completed 240 && GP_REGNUM_P (REGNO (operands[0])) 241 && GP_REGNUM_P (REGNO (operands[1]))" 242 [(const_int 0)] 243{ 244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2); 245 DONE; 246}) 247 248(define_split 249 [(set (match_operand:VQ 0 "register_operand" "") 250 (match_operand:VQ 1 "register_operand" ""))] 251 "TARGET_SIMD && reload_completed 252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) 253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" 254 [(const_int 0)] 255{ 256 aarch64_split_simd_move (operands[0], operands[1]); 257 DONE; 258}) 259 260(define_expand "@aarch64_split_simd_mov<mode>" 261 [(set (match_operand:VQ 0) 262 (match_operand:VQ 1))] 263 "TARGET_SIMD" 264 { 265 rtx dst = operands[0]; 266 rtx src = operands[1]; 267 268 if (GP_REGNUM_P (REGNO (src))) 269 { 270 rtx src_low_part = gen_lowpart (<VHALF>mode, src); 271 rtx src_high_part = gen_highpart (<VHALF>mode, src); 272 273 emit_insn 274 (gen_move_lo_quad_<mode> (dst, src_low_part)); 275 emit_insn 276 (gen_move_hi_quad_<mode> (dst, src_high_part)); 277 } 278 279 else 280 { 281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 285 286 emit_insn 287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); 288 emit_insn 289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); 290 } 291 DONE; 292 } 293) 294 295(define_insn "aarch64_simd_mov_from_<mode>low" 296 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 297 (vec_select:<VHALF> 298 (match_operand:VQ 1 "register_operand" "w") 299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] 300 "TARGET_SIMD && reload_completed" 301 "umov\t%0, %1.d[0]" 302 [(set_attr "type" "neon_to_gp<q>") 303 (set_attr "length" "4") 304 ]) 305 306(define_insn "aarch64_simd_mov_from_<mode>high" 307 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 308 (vec_select:<VHALF> 309 (match_operand:VQ 1 "register_operand" "w") 310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] 311 "TARGET_SIMD && reload_completed" 312 "umov\t%0, %1.d[1]" 313 [(set_attr "type" "neon_to_gp<q>") 314 (set_attr "length" "4") 315 ]) 316 317(define_insn "orn<mode>3" 318 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 320 (match_operand:VDQ_I 2 "register_operand" "w")))] 321 "TARGET_SIMD" 322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 323 [(set_attr "type" "neon_logic<q>")] 324) 325 326(define_insn "bic<mode>3" 327 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 329 (match_operand:VDQ_I 2 "register_operand" "w")))] 330 "TARGET_SIMD" 331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 332 [(set_attr "type" "neon_logic<q>")] 333) 334 335(define_insn "add<mode>3" 336 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 338 (match_operand:VDQ_I 2 "register_operand" "w")))] 339 "TARGET_SIMD" 340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 341 [(set_attr "type" "neon_add<q>")] 342) 343 344(define_insn "sub<mode>3" 345 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 347 (match_operand:VDQ_I 2 "register_operand" "w")))] 348 "TARGET_SIMD" 349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 350 [(set_attr "type" "neon_sub<q>")] 351) 352 353(define_insn "mul<mode>3" 354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 357 "TARGET_SIMD" 358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 359 [(set_attr "type" "neon_mul_<Vetype><q>")] 360) 361 362(define_insn "bswap<mode>2" 363 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 365 "TARGET_SIMD" 366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" 367 [(set_attr "type" "neon_rev<q>")] 368) 369 370(define_insn "aarch64_rbit<mode>" 371 [(set (match_operand:VB 0 "register_operand" "=w") 372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")] 373 UNSPEC_RBIT))] 374 "TARGET_SIMD" 375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>" 376 [(set_attr "type" "neon_rbit")] 377) 378 379(define_expand "ctz<mode>2" 380 [(set (match_operand:VS 0 "register_operand") 381 (ctz:VS (match_operand:VS 1 "register_operand")))] 382 "TARGET_SIMD" 383 { 384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); 385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], 386 <MODE>mode, 0); 387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); 388 emit_insn (gen_clz<mode>2 (operands[0], operands[0])); 389 DONE; 390 } 391) 392 393(define_expand "xorsign<mode>3" 394 [(match_operand:VHSDF 0 "register_operand") 395 (match_operand:VHSDF 1 "register_operand") 396 (match_operand:VHSDF 2 "register_operand")] 397 "TARGET_SIMD" 398{ 399 400 machine_mode imode = <V_INT_EQUIV>mode; 401 rtx v_bitmask = gen_reg_rtx (imode); 402 rtx op1x = gen_reg_rtx (imode); 403 rtx op2x = gen_reg_rtx (imode); 404 405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode); 406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode); 407 408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 409 410 emit_move_insn (v_bitmask, 411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 412 HOST_WIDE_INT_M1U << bits)); 413 414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2)); 415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x)); 416 emit_move_insn (operands[0], 417 lowpart_subreg (<MODE>mode, op1x, imode)); 418 DONE; 419} 420) 421 422;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the 423;; fact that their usage need to guarantee that the source vectors are 424;; contiguous. It would be wrong to describe the operation without being able 425;; to describe the permute that is also required, but even if that is done 426;; the permute would have been created as a LOAD_LANES which means the values 427;; in the registers are in the wrong order. 428(define_insn "aarch64_fcadd<rot><mode>" 429 [(set (match_operand:VHSDF 0 "register_operand" "=w") 430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 431 (match_operand:VHSDF 2 "register_operand" "w")] 432 FCADD))] 433 "TARGET_COMPLEX" 434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>" 435 [(set_attr "type" "neon_fcadd")] 436) 437 438(define_insn "aarch64_fcmla<rot><mode>" 439 [(set (match_operand:VHSDF 0 "register_operand" "=w") 440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 442 (match_operand:VHSDF 3 "register_operand" "w")] 443 FCMLA)))] 444 "TARGET_COMPLEX" 445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>" 446 [(set_attr "type" "neon_fcmla")] 447) 448 449 450(define_insn "aarch64_fcmla_lane<rot><mode>" 451 [(set (match_operand:VHSDF 0 "register_operand" "=w") 452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 454 (match_operand:VHSDF 3 "register_operand" "w") 455 (match_operand:SI 4 "const_int_operand" "n")] 456 FCMLA)))] 457 "TARGET_COMPLEX" 458{ 459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4])); 460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 461} 462 [(set_attr "type" "neon_fcmla")] 463) 464 465(define_insn "aarch64_fcmla_laneq<rot>v4hf" 466 [(set (match_operand:V4HF 0 "register_operand" "=w") 467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0") 468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w") 469 (match_operand:V8HF 3 "register_operand" "w") 470 (match_operand:SI 4 "const_int_operand" "n")] 471 FCMLA)))] 472 "TARGET_COMPLEX" 473{ 474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>"; 476} 477 [(set_attr "type" "neon_fcmla")] 478) 479 480(define_insn "aarch64_fcmlaq_lane<rot><mode>" 481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w") 482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0") 483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w") 484 (match_operand:<VHALF> 3 "register_operand" "w") 485 (match_operand:SI 4 "const_int_operand" "n")] 486 FCMLA)))] 487 "TARGET_COMPLEX" 488{ 489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant (); 490 operands[4] 491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode); 492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 493} 494 [(set_attr "type" "neon_fcmla")] 495) 496 497;; These instructions map to the __builtins for the Dot Product operations. 498(define_insn "aarch64_<sur>dot<vsi2qi>" 499 [(set (match_operand:VS 0 "register_operand" "=w") 500 (plus:VS (match_operand:VS 1 "register_operand" "0") 501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 502 (match_operand:<VSI2QI> 3 "register_operand" "w")] 503 DOTPROD)))] 504 "TARGET_DOTPROD" 505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 506 [(set_attr "type" "neon_dot<q>")] 507) 508 509;; These expands map to the Dot Product optab the vectorizer checks for. 510;; The auto-vectorizer expects a dot product builtin that also does an 511;; accumulation into the provided register. 512;; Given the following pattern 513;; 514;; for (i=0; i<len; i++) { 515;; c = a[i] * b[i]; 516;; r += c; 517;; } 518;; return result; 519;; 520;; This can be auto-vectorized to 521;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 522;; 523;; given enough iterations. However the vectorizer can keep unrolling the loop 524;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 525;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 526;; ... 527;; 528;; and so the vectorizer provides r, in which the result has to be accumulated. 529(define_expand "<sur>dot_prod<vsi2qi>" 530 [(set (match_operand:VS 0 "register_operand") 531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") 532 (match_operand:<VSI2QI> 2 "register_operand")] 533 DOTPROD) 534 (match_operand:VS 3 "register_operand")))] 535 "TARGET_DOTPROD" 536{ 537 emit_insn ( 538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1], 539 operands[2])); 540 emit_insn (gen_rtx_SET (operands[0], operands[3])); 541 DONE; 542}) 543 544;; These instructions map to the __builtins for the Dot Product 545;; indexed operations. 546(define_insn "aarch64_<sur>dot_lane<vsi2qi>" 547 [(set (match_operand:VS 0 "register_operand" "=w") 548 (plus:VS (match_operand:VS 1 "register_operand" "0") 549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 550 (match_operand:V8QI 3 "register_operand" "<h_con>") 551 (match_operand:SI 4 "immediate_operand" "i")] 552 DOTPROD)))] 553 "TARGET_DOTPROD" 554 { 555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); 556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 557 } 558 [(set_attr "type" "neon_dot<q>")] 559) 560 561(define_insn "aarch64_<sur>dot_laneq<vsi2qi>" 562 [(set (match_operand:VS 0 "register_operand" "=w") 563 (plus:VS (match_operand:VS 1 "register_operand" "0") 564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 565 (match_operand:V16QI 3 "register_operand" "<h_con>") 566 (match_operand:SI 4 "immediate_operand" "i")] 567 DOTPROD)))] 568 "TARGET_DOTPROD" 569 { 570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); 571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 572 } 573 [(set_attr "type" "neon_dot<q>")] 574) 575 576(define_expand "copysign<mode>3" 577 [(match_operand:VHSDF 0 "register_operand") 578 (match_operand:VHSDF 1 "register_operand") 579 (match_operand:VHSDF 2 "register_operand")] 580 "TARGET_FLOAT && TARGET_SIMD" 581{ 582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); 583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 584 585 emit_move_insn (v_bitmask, 586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 587 HOST_WIDE_INT_M1U << bits)); 588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, 589 operands[2], operands[1])); 590 DONE; 591} 592) 593 594(define_insn "*aarch64_mul3_elt<mode>" 595 [(set (match_operand:VMUL 0 "register_operand" "=w") 596 (mult:VMUL 597 (vec_duplicate:VMUL 598 (vec_select:<VEL> 599 (match_operand:VMUL 1 "register_operand" "<h_con>") 600 (parallel [(match_operand:SI 2 "immediate_operand")]))) 601 (match_operand:VMUL 3 "register_operand" "w")))] 602 "TARGET_SIMD" 603 { 604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 606 } 607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 608) 609 610(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" 611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") 612 (mult:VMUL_CHANGE_NLANES 613 (vec_duplicate:VMUL_CHANGE_NLANES 614 (vec_select:<VEL> 615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 616 (parallel [(match_operand:SI 2 "immediate_operand")]))) 617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 618 "TARGET_SIMD" 619 { 620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 622 } 623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 624) 625 626(define_insn "*aarch64_mul3_elt_from_dup<mode>" 627 [(set (match_operand:VMUL 0 "register_operand" "=w") 628 (mult:VMUL 629 (vec_duplicate:VMUL 630 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 631 (match_operand:VMUL 2 "register_operand" "w")))] 632 "TARGET_SIMD" 633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 635) 636 637(define_insn "@aarch64_rsqrte<mode>" 638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 640 UNSPEC_RSQRTE))] 641 "TARGET_SIMD" 642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 644 645(define_insn "@aarch64_rsqrts<mode>" 646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 649 UNSPEC_RSQRTS))] 650 "TARGET_SIMD" 651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) 653 654(define_expand "rsqrt<mode>2" 655 [(set (match_operand:VALLF 0 "register_operand" "=w") 656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] 657 UNSPEC_RSQRT))] 658 "TARGET_SIMD" 659{ 660 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 661 DONE; 662}) 663 664(define_insn "*aarch64_mul3_elt_to_64v2df" 665 [(set (match_operand:DF 0 "register_operand" "=w") 666 (mult:DF 667 (vec_select:DF 668 (match_operand:V2DF 1 "register_operand" "w") 669 (parallel [(match_operand:SI 2 "immediate_operand")])) 670 (match_operand:DF 3 "register_operand" "w")))] 671 "TARGET_SIMD" 672 { 673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 675 } 676 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 677) 678 679(define_insn "neg<mode>2" 680 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 682 "TARGET_SIMD" 683 "neg\t%0.<Vtype>, %1.<Vtype>" 684 [(set_attr "type" "neon_neg<q>")] 685) 686 687(define_insn "abs<mode>2" 688 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 690 "TARGET_SIMD" 691 "abs\t%0.<Vtype>, %1.<Vtype>" 692 [(set_attr "type" "neon_abs<q>")] 693) 694 695;; The intrinsic version of integer ABS must not be allowed to 696;; combine with any operation with an integerated ABS step, such 697;; as SABD. 698(define_insn "aarch64_abs<mode>" 699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 700 (unspec:VSDQ_I_DI 701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] 702 UNSPEC_ABS))] 703 "TARGET_SIMD" 704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" 705 [(set_attr "type" "neon_abs<q>")] 706) 707 708;; It's tempting to represent SABD as ABS (MINUS op1 op2). 709;; This isn't accurate as ABS treats always its input as a signed value. 710;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. 711;; Whereas SABD would return 192 (-64 signed) on the above example. 712;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. 713(define_insn "*aarch64_<su>abd<mode>_3" 714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 715 (minus:VDQ_BHSI 716 (USMAX:VDQ_BHSI 717 (match_operand:VDQ_BHSI 1 "register_operand" "w") 718 (match_operand:VDQ_BHSI 2 "register_operand" "w")) 719 (match_operator 3 "aarch64_<max_opp>" 720 [(match_dup 1) 721 (match_dup 2)])))] 722 "TARGET_SIMD" 723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 724 [(set_attr "type" "neon_abd<q>")] 725) 726 727(define_insn "aarch64_<sur>abdl2<mode>_3" 728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 730 (match_operand:VDQV_S 2 "register_operand" "w")] 731 ABDL2))] 732 "TARGET_SIMD" 733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 734 [(set_attr "type" "neon_abd<q>")] 735) 736 737(define_insn "aarch64_<sur>abal<mode>_4" 738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 740 (match_operand:VDQV_S 2 "register_operand" "w") 741 (match_operand:<VDBLW> 3 "register_operand" "0")] 742 ABAL))] 743 "TARGET_SIMD" 744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 745 [(set_attr "type" "neon_arith_acc<q>")] 746) 747 748(define_insn "aarch64_<sur>adalp<mode>_3" 749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 751 (match_operand:<VDBLW> 2 "register_operand" "0")] 752 ADALP))] 753 "TARGET_SIMD" 754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>" 755 [(set_attr "type" "neon_reduc_add<q>")] 756) 757 758;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI 759;; inputs in operands 1 and 2. The sequence also has to perform a widening 760;; reduction of the difference into a V4SI vector and accumulate that into 761;; operand 3 before copying that into the result operand 0. 762;; Perform that with a sequence of: 763;; UABDL2 tmp.8h, op1.16b, op2.16b 764;; UABAL tmp.8h, op1.16b, op2.16b 765;; UADALP op3.4s, tmp.8h 766;; MOV op0, op3 // should be eliminated in later passes. 767;; The signed version just uses the signed variants of the above instructions. 768 769(define_expand "<sur>sadv16qi" 770 [(use (match_operand:V4SI 0 "register_operand")) 771 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) 772 (use (match_operand:V16QI 2 "register_operand"))] ABAL) 773 (use (match_operand:V4SI 3 "register_operand"))] 774 "TARGET_SIMD" 775 { 776 rtx reduc = gen_reg_rtx (V8HImode); 777 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1], 778 operands[2])); 779 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1], 780 operands[2], reduc)); 781 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc, 782 operands[3])); 783 emit_move_insn (operands[0], operands[3]); 784 DONE; 785 } 786) 787 788(define_insn "aba<mode>_3" 789 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 790 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 791 (match_operand:VDQ_BHSI 1 "register_operand" "w") 792 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) 793 (match_operand:VDQ_BHSI 3 "register_operand" "0")))] 794 "TARGET_SIMD" 795 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 796 [(set_attr "type" "neon_arith_acc<q>")] 797) 798 799(define_insn "fabd<mode>3" 800 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 801 (abs:VHSDF_HSDF 802 (minus:VHSDF_HSDF 803 (match_operand:VHSDF_HSDF 1 "register_operand" "w") 804 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] 805 "TARGET_SIMD" 806 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 807 [(set_attr "type" "neon_fp_abd_<stype><q>")] 808) 809 810;; For AND (vector, register) and BIC (vector, immediate) 811(define_insn "and<mode>3" 812 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 813 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 814 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))] 815 "TARGET_SIMD" 816 { 817 switch (which_alternative) 818 { 819 case 0: 820 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 821 case 1: 822 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 823 AARCH64_CHECK_BIC); 824 default: 825 gcc_unreachable (); 826 } 827 } 828 [(set_attr "type" "neon_logic<q>")] 829) 830 831;; For ORR (vector, register) and ORR (vector, immediate) 832(define_insn "ior<mode>3" 833 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 834 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 835 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))] 836 "TARGET_SIMD" 837 { 838 switch (which_alternative) 839 { 840 case 0: 841 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 842 case 1: 843 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 844 AARCH64_CHECK_ORR); 845 default: 846 gcc_unreachable (); 847 } 848 } 849 [(set_attr "type" "neon_logic<q>")] 850) 851 852(define_insn "xor<mode>3" 853 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 854 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 855 (match_operand:VDQ_I 2 "register_operand" "w")))] 856 "TARGET_SIMD" 857 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 858 [(set_attr "type" "neon_logic<q>")] 859) 860 861(define_insn "one_cmpl<mode>2" 862 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 863 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 864 "TARGET_SIMD" 865 "not\t%0.<Vbtype>, %1.<Vbtype>" 866 [(set_attr "type" "neon_logic<q>")] 867) 868 869(define_insn "aarch64_simd_vec_set<mode>" 870 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") 871 (vec_merge:VALL_F16 872 (vec_duplicate:VALL_F16 873 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv")) 874 (match_operand:VALL_F16 3 "register_operand" "0,0,0") 875 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 876 "TARGET_SIMD" 877 { 878 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 879 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 880 switch (which_alternative) 881 { 882 case 0: 883 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 884 case 1: 885 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1"; 886 case 2: 887 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 888 default: 889 gcc_unreachable (); 890 } 891 } 892 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] 893) 894 895(define_insn "*aarch64_simd_vec_copy_lane<mode>" 896 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 897 (vec_merge:VALL_F16 898 (vec_duplicate:VALL_F16 899 (vec_select:<VEL> 900 (match_operand:VALL_F16 3 "register_operand" "w") 901 (parallel 902 [(match_operand:SI 4 "immediate_operand" "i")]))) 903 (match_operand:VALL_F16 1 "register_operand" "0") 904 (match_operand:SI 2 "immediate_operand" "i")))] 905 "TARGET_SIMD" 906 { 907 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 908 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 909 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 910 911 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 912 } 913 [(set_attr "type" "neon_ins<q>")] 914) 915 916(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" 917 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 918 (vec_merge:VALL_F16_NO_V2Q 919 (vec_duplicate:VALL_F16_NO_V2Q 920 (vec_select:<VEL> 921 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") 922 (parallel 923 [(match_operand:SI 4 "immediate_operand" "i")]))) 924 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 925 (match_operand:SI 2 "immediate_operand" "i")))] 926 "TARGET_SIMD" 927 { 928 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 929 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 930 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, 931 INTVAL (operands[4])); 932 933 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 934 } 935 [(set_attr "type" "neon_ins<q>")] 936) 937 938(define_insn "aarch64_simd_lshr<mode>" 939 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 940 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 941 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 942 "TARGET_SIMD" 943 "ushr\t%0.<Vtype>, %1.<Vtype>, %2" 944 [(set_attr "type" "neon_shift_imm<q>")] 945) 946 947(define_insn "aarch64_simd_ashr<mode>" 948 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 949 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 950 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 951 "TARGET_SIMD" 952 "sshr\t%0.<Vtype>, %1.<Vtype>, %2" 953 [(set_attr "type" "neon_shift_imm<q>")] 954) 955 956(define_insn "aarch64_simd_imm_shl<mode>" 957 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 958 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 959 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] 960 "TARGET_SIMD" 961 "shl\t%0.<Vtype>, %1.<Vtype>, %2" 962 [(set_attr "type" "neon_shift_imm<q>")] 963) 964 965(define_insn "aarch64_simd_reg_sshl<mode>" 966 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 967 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 968 (match_operand:VDQ_I 2 "register_operand" "w")))] 969 "TARGET_SIMD" 970 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 971 [(set_attr "type" "neon_shift_reg<q>")] 972) 973 974(define_insn "aarch64_simd_reg_shl<mode>_unsigned" 975 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 976 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 977 (match_operand:VDQ_I 2 "register_operand" "w")] 978 UNSPEC_ASHIFT_UNSIGNED))] 979 "TARGET_SIMD" 980 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 981 [(set_attr "type" "neon_shift_reg<q>")] 982) 983 984(define_insn "aarch64_simd_reg_shl<mode>_signed" 985 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 986 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 987 (match_operand:VDQ_I 2 "register_operand" "w")] 988 UNSPEC_ASHIFT_SIGNED))] 989 "TARGET_SIMD" 990 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 991 [(set_attr "type" "neon_shift_reg<q>")] 992) 993 994(define_expand "ashl<mode>3" 995 [(match_operand:VDQ_I 0 "register_operand" "") 996 (match_operand:VDQ_I 1 "register_operand" "") 997 (match_operand:SI 2 "general_operand" "")] 998 "TARGET_SIMD" 999{ 1000 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1001 int shift_amount; 1002 1003 if (CONST_INT_P (operands[2])) 1004 { 1005 shift_amount = INTVAL (operands[2]); 1006 if (shift_amount >= 0 && shift_amount < bit_width) 1007 { 1008 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1009 shift_amount); 1010 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], 1011 operands[1], 1012 tmp)); 1013 DONE; 1014 } 1015 else 1016 { 1017 operands[2] = force_reg (SImode, operands[2]); 1018 } 1019 } 1020 else if (MEM_P (operands[2])) 1021 { 1022 operands[2] = force_reg (SImode, operands[2]); 1023 } 1024 1025 if (REG_P (operands[2])) 1026 { 1027 rtx tmp = gen_reg_rtx (<MODE>mode); 1028 emit_insn (gen_aarch64_simd_dup<mode> (tmp, 1029 convert_to_mode (<VEL>mode, 1030 operands[2], 1031 0))); 1032 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 1033 tmp)); 1034 DONE; 1035 } 1036 else 1037 FAIL; 1038} 1039) 1040 1041(define_expand "lshr<mode>3" 1042 [(match_operand:VDQ_I 0 "register_operand" "") 1043 (match_operand:VDQ_I 1 "register_operand" "") 1044 (match_operand:SI 2 "general_operand" "")] 1045 "TARGET_SIMD" 1046{ 1047 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1048 int shift_amount; 1049 1050 if (CONST_INT_P (operands[2])) 1051 { 1052 shift_amount = INTVAL (operands[2]); 1053 if (shift_amount > 0 && shift_amount <= bit_width) 1054 { 1055 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1056 shift_amount); 1057 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], 1058 operands[1], 1059 tmp)); 1060 DONE; 1061 } 1062 else 1063 operands[2] = force_reg (SImode, operands[2]); 1064 } 1065 else if (MEM_P (operands[2])) 1066 { 1067 operands[2] = force_reg (SImode, operands[2]); 1068 } 1069 1070 if (REG_P (operands[2])) 1071 { 1072 rtx tmp = gen_reg_rtx (SImode); 1073 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1074 emit_insn (gen_negsi2 (tmp, operands[2])); 1075 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 1076 convert_to_mode (<VEL>mode, 1077 tmp, 0))); 1078 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], 1079 operands[1], 1080 tmp1)); 1081 DONE; 1082 } 1083 else 1084 FAIL; 1085} 1086) 1087 1088(define_expand "ashr<mode>3" 1089 [(match_operand:VDQ_I 0 "register_operand" "") 1090 (match_operand:VDQ_I 1 "register_operand" "") 1091 (match_operand:SI 2 "general_operand" "")] 1092 "TARGET_SIMD" 1093{ 1094 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1095 int shift_amount; 1096 1097 if (CONST_INT_P (operands[2])) 1098 { 1099 shift_amount = INTVAL (operands[2]); 1100 if (shift_amount > 0 && shift_amount <= bit_width) 1101 { 1102 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1103 shift_amount); 1104 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], 1105 operands[1], 1106 tmp)); 1107 DONE; 1108 } 1109 else 1110 operands[2] = force_reg (SImode, operands[2]); 1111 } 1112 else if (MEM_P (operands[2])) 1113 { 1114 operands[2] = force_reg (SImode, operands[2]); 1115 } 1116 1117 if (REG_P (operands[2])) 1118 { 1119 rtx tmp = gen_reg_rtx (SImode); 1120 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1121 emit_insn (gen_negsi2 (tmp, operands[2])); 1122 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 1123 convert_to_mode (<VEL>mode, 1124 tmp, 0))); 1125 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], 1126 operands[1], 1127 tmp1)); 1128 DONE; 1129 } 1130 else 1131 FAIL; 1132} 1133) 1134 1135(define_expand "vashl<mode>3" 1136 [(match_operand:VDQ_I 0 "register_operand" "") 1137 (match_operand:VDQ_I 1 "register_operand" "") 1138 (match_operand:VDQ_I 2 "register_operand" "")] 1139 "TARGET_SIMD" 1140{ 1141 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 1142 operands[2])); 1143 DONE; 1144}) 1145 1146;; Using mode VDQ_BHSI as there is no V2DImode neg! 1147;; Negating individual lanes most certainly offsets the 1148;; gain from vectorization. 1149(define_expand "vashr<mode>3" 1150 [(match_operand:VDQ_BHSI 0 "register_operand" "") 1151 (match_operand:VDQ_BHSI 1 "register_operand" "") 1152 (match_operand:VDQ_BHSI 2 "register_operand" "")] 1153 "TARGET_SIMD" 1154{ 1155 rtx neg = gen_reg_rtx (<MODE>mode); 1156 emit (gen_neg<mode>2 (neg, operands[2])); 1157 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 1158 neg)); 1159 DONE; 1160}) 1161 1162;; DI vector shift 1163(define_expand "aarch64_ashr_simddi" 1164 [(match_operand:DI 0 "register_operand" "=w") 1165 (match_operand:DI 1 "register_operand" "w") 1166 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 1167 "TARGET_SIMD" 1168 { 1169 /* An arithmetic shift right by 64 fills the result with copies of the sign 1170 bit, just like asr by 63 - however the standard pattern does not handle 1171 a shift by 64. */ 1172 if (INTVAL (operands[2]) == 64) 1173 operands[2] = GEN_INT (63); 1174 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); 1175 DONE; 1176 } 1177) 1178 1179(define_expand "vlshr<mode>3" 1180 [(match_operand:VDQ_BHSI 0 "register_operand" "") 1181 (match_operand:VDQ_BHSI 1 "register_operand" "") 1182 (match_operand:VDQ_BHSI 2 "register_operand" "")] 1183 "TARGET_SIMD" 1184{ 1185 rtx neg = gen_reg_rtx (<MODE>mode); 1186 emit (gen_neg<mode>2 (neg, operands[2])); 1187 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1188 neg)); 1189 DONE; 1190}) 1191 1192(define_expand "aarch64_lshr_simddi" 1193 [(match_operand:DI 0 "register_operand" "=w") 1194 (match_operand:DI 1 "register_operand" "w") 1195 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 1196 "TARGET_SIMD" 1197 { 1198 if (INTVAL (operands[2]) == 64) 1199 emit_move_insn (operands[0], const0_rtx); 1200 else 1201 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 1202 DONE; 1203 } 1204) 1205 1206;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 1207(define_insn "vec_shr_<mode>" 1208 [(set (match_operand:VD 0 "register_operand" "=w") 1209 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 1210 (match_operand:SI 2 "immediate_operand" "i")] 1211 UNSPEC_VEC_SHR))] 1212 "TARGET_SIMD" 1213 { 1214 if (BYTES_BIG_ENDIAN) 1215 return "shl %d0, %d1, %2"; 1216 else 1217 return "ushr %d0, %d1, %2"; 1218 } 1219 [(set_attr "type" "neon_shift_imm")] 1220) 1221 1222(define_expand "vec_set<mode>" 1223 [(match_operand:VALL_F16 0 "register_operand" "+w") 1224 (match_operand:<VEL> 1 "register_operand" "w") 1225 (match_operand:SI 2 "immediate_operand" "")] 1226 "TARGET_SIMD" 1227 { 1228 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1229 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 1230 GEN_INT (elem), operands[0])); 1231 DONE; 1232 } 1233) 1234 1235 1236(define_insn "aarch64_mla<mode>" 1237 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1238 (plus:VDQ_BHSI (mult:VDQ_BHSI 1239 (match_operand:VDQ_BHSI 2 "register_operand" "w") 1240 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 1241 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 1242 "TARGET_SIMD" 1243 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1244 [(set_attr "type" "neon_mla_<Vetype><q>")] 1245) 1246 1247(define_insn "*aarch64_mla_elt<mode>" 1248 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1249 (plus:VDQHS 1250 (mult:VDQHS 1251 (vec_duplicate:VDQHS 1252 (vec_select:<VEL> 1253 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1254 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1255 (match_operand:VDQHS 3 "register_operand" "w")) 1256 (match_operand:VDQHS 4 "register_operand" "0")))] 1257 "TARGET_SIMD" 1258 { 1259 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1260 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1261 } 1262 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1263) 1264 1265(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" 1266 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1267 (plus:VDQHS 1268 (mult:VDQHS 1269 (vec_duplicate:VDQHS 1270 (vec_select:<VEL> 1271 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1272 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1273 (match_operand:VDQHS 3 "register_operand" "w")) 1274 (match_operand:VDQHS 4 "register_operand" "0")))] 1275 "TARGET_SIMD" 1276 { 1277 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1278 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1279 } 1280 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1281) 1282 1283(define_insn "*aarch64_mla_elt_merge<mode>" 1284 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1285 (plus:VDQHS 1286 (mult:VDQHS (vec_duplicate:VDQHS 1287 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1288 (match_operand:VDQHS 2 "register_operand" "w")) 1289 (match_operand:VDQHS 3 "register_operand" "0")))] 1290 "TARGET_SIMD" 1291 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1292 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1293) 1294 1295(define_insn "aarch64_mls<mode>" 1296 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1297 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") 1298 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") 1299 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] 1300 "TARGET_SIMD" 1301 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1302 [(set_attr "type" "neon_mla_<Vetype><q>")] 1303) 1304 1305(define_insn "*aarch64_mls_elt<mode>" 1306 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1307 (minus:VDQHS 1308 (match_operand:VDQHS 4 "register_operand" "0") 1309 (mult:VDQHS 1310 (vec_duplicate:VDQHS 1311 (vec_select:<VEL> 1312 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1313 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1314 (match_operand:VDQHS 3 "register_operand" "w"))))] 1315 "TARGET_SIMD" 1316 { 1317 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1318 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1319 } 1320 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1321) 1322 1323(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" 1324 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1325 (minus:VDQHS 1326 (match_operand:VDQHS 4 "register_operand" "0") 1327 (mult:VDQHS 1328 (vec_duplicate:VDQHS 1329 (vec_select:<VEL> 1330 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1331 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1332 (match_operand:VDQHS 3 "register_operand" "w"))))] 1333 "TARGET_SIMD" 1334 { 1335 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1336 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1337 } 1338 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1339) 1340 1341(define_insn "*aarch64_mls_elt_merge<mode>" 1342 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1343 (minus:VDQHS 1344 (match_operand:VDQHS 1 "register_operand" "0") 1345 (mult:VDQHS (vec_duplicate:VDQHS 1346 (match_operand:<VEL> 2 "register_operand" "<h_con>")) 1347 (match_operand:VDQHS 3 "register_operand" "w"))))] 1348 "TARGET_SIMD" 1349 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]" 1350 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1351) 1352 1353;; Max/Min operations. 1354(define_insn "<su><maxmin><mode>3" 1355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1356 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 1357 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 1358 "TARGET_SIMD" 1359 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1360 [(set_attr "type" "neon_minmax<q>")] 1361) 1362 1363(define_expand "<su><maxmin>v2di3" 1364 [(set (match_operand:V2DI 0 "register_operand" "") 1365 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") 1366 (match_operand:V2DI 2 "register_operand" "")))] 1367 "TARGET_SIMD" 1368{ 1369 enum rtx_code cmp_operator; 1370 rtx cmp_fmt; 1371 1372 switch (<CODE>) 1373 { 1374 case UMIN: 1375 cmp_operator = LTU; 1376 break; 1377 case SMIN: 1378 cmp_operator = LT; 1379 break; 1380 case UMAX: 1381 cmp_operator = GTU; 1382 break; 1383 case SMAX: 1384 cmp_operator = GT; 1385 break; 1386 default: 1387 gcc_unreachable (); 1388 } 1389 1390 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); 1391 emit_insn (gen_vcondv2div2di (operands[0], operands[1], 1392 operands[2], cmp_fmt, operands[1], operands[2])); 1393 DONE; 1394}) 1395 1396;; Pairwise Integer Max/Min operations. 1397(define_insn "aarch64_<maxmin_uns>p<mode>" 1398 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1399 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 1400 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 1401 MAXMINV))] 1402 "TARGET_SIMD" 1403 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1404 [(set_attr "type" "neon_minmax<q>")] 1405) 1406 1407;; Pairwise FP Max/Min operations. 1408(define_insn "aarch64_<maxmin_uns>p<mode>" 1409 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1410 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 1411 (match_operand:VHSDF 2 "register_operand" "w")] 1412 FMAXMINV))] 1413 "TARGET_SIMD" 1414 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1415 [(set_attr "type" "neon_minmax<q>")] 1416) 1417 1418;; vec_concat gives a new vector with the low elements from operand 1, and 1419;; the high elements from operand 2. That is to say, given op1 = { a, b } 1420;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. 1421;; What that means, is that the RTL descriptions of the below patterns 1422;; need to change depending on endianness. 1423 1424;; Move to the low architectural bits of the register. 1425;; On little-endian this is { operand, zeroes } 1426;; On big-endian this is { zeroes, operand } 1427 1428(define_insn "move_lo_quad_internal_<mode>" 1429 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1430 (vec_concat:VQ_NO2E 1431 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1432 (vec_duplicate:<VHALF> (const_int 0))))] 1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1434 "@ 1435 dup\\t%d0, %1.d[0] 1436 fmov\\t%d0, %1 1437 dup\\t%d0, %1" 1438 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1439 (set_attr "length" "4") 1440 (set_attr "arch" "simd,fp,simd")] 1441) 1442 1443(define_insn "move_lo_quad_internal_<mode>" 1444 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1445 (vec_concat:VQ_2E 1446 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1447 (const_int 0)))] 1448 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1449 "@ 1450 dup\\t%d0, %1.d[0] 1451 fmov\\t%d0, %1 1452 dup\\t%d0, %1" 1453 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1454 (set_attr "length" "4") 1455 (set_attr "arch" "simd,fp,simd")] 1456) 1457 1458(define_insn "move_lo_quad_internal_be_<mode>" 1459 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1460 (vec_concat:VQ_NO2E 1461 (vec_duplicate:<VHALF> (const_int 0)) 1462 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1463 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1464 "@ 1465 dup\\t%d0, %1.d[0] 1466 fmov\\t%d0, %1 1467 dup\\t%d0, %1" 1468 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1469 (set_attr "length" "4") 1470 (set_attr "arch" "simd,fp,simd")] 1471) 1472 1473(define_insn "move_lo_quad_internal_be_<mode>" 1474 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1475 (vec_concat:VQ_2E 1476 (const_int 0) 1477 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1478 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1479 "@ 1480 dup\\t%d0, %1.d[0] 1481 fmov\\t%d0, %1 1482 dup\\t%d0, %1" 1483 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1484 (set_attr "length" "4") 1485 (set_attr "arch" "simd,fp,simd")] 1486) 1487 1488(define_expand "move_lo_quad_<mode>" 1489 [(match_operand:VQ 0 "register_operand") 1490 (match_operand:VQ 1 "register_operand")] 1491 "TARGET_SIMD" 1492{ 1493 if (BYTES_BIG_ENDIAN) 1494 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); 1495 else 1496 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); 1497 DONE; 1498} 1499) 1500 1501;; Move operand1 to the high architectural bits of the register, keeping 1502;; the low architectural bits of operand2. 1503;; For little-endian this is { operand2, operand1 } 1504;; For big-endian this is { operand1, operand2 } 1505 1506(define_insn "aarch64_simd_move_hi_quad_<mode>" 1507 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1508 (vec_concat:VQ 1509 (vec_select:<VHALF> 1510 (match_dup 0) 1511 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) 1512 (match_operand:<VHALF> 1 "register_operand" "w,r")))] 1513 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1514 "@ 1515 ins\\t%0.d[1], %1.d[0] 1516 ins\\t%0.d[1], %1" 1517 [(set_attr "type" "neon_ins")] 1518) 1519 1520(define_insn "aarch64_simd_move_hi_quad_be_<mode>" 1521 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1522 (vec_concat:VQ 1523 (match_operand:<VHALF> 1 "register_operand" "w,r") 1524 (vec_select:<VHALF> 1525 (match_dup 0) 1526 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] 1527 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1528 "@ 1529 ins\\t%0.d[1], %1.d[0] 1530 ins\\t%0.d[1], %1" 1531 [(set_attr "type" "neon_ins")] 1532) 1533 1534(define_expand "move_hi_quad_<mode>" 1535 [(match_operand:VQ 0 "register_operand" "") 1536 (match_operand:<VHALF> 1 "register_operand" "")] 1537 "TARGET_SIMD" 1538{ 1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1540 if (BYTES_BIG_ENDIAN) 1541 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1542 operands[1], p)); 1543 else 1544 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1545 operands[1], p)); 1546 DONE; 1547}) 1548 1549;; Narrowing operations. 1550 1551;; For doubles. 1552(define_insn "aarch64_simd_vec_pack_trunc_<mode>" 1553 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 1554 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 1555 "TARGET_SIMD" 1556 "xtn\\t%0.<Vntype>, %1.<Vtype>" 1557 [(set_attr "type" "neon_shift_imm_narrow_q")] 1558) 1559 1560(define_expand "vec_pack_trunc_<mode>" 1561 [(match_operand:<VNARROWD> 0 "register_operand" "") 1562 (match_operand:VDN 1 "register_operand" "") 1563 (match_operand:VDN 2 "register_operand" "")] 1564 "TARGET_SIMD" 1565{ 1566 rtx tempreg = gen_reg_rtx (<VDBL>mode); 1567 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 1568 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 1569 1570 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); 1571 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); 1572 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); 1573 DONE; 1574}) 1575 1576;; For quads. 1577 1578(define_insn "vec_pack_trunc_<mode>" 1579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") 1580 (vec_concat:<VNARROWQ2> 1581 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) 1582 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 1583 "TARGET_SIMD" 1584 { 1585 if (BYTES_BIG_ENDIAN) 1586 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; 1587 else 1588 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; 1589 } 1590 [(set_attr "type" "multiple") 1591 (set_attr "length" "8")] 1592) 1593 1594;; Widening operations. 1595 1596(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" 1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1599 (match_operand:VQW 1 "register_operand" "w") 1600 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1601 )))] 1602 "TARGET_SIMD" 1603 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>" 1604 [(set_attr "type" "neon_shift_imm_long")] 1605) 1606 1607(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1609 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1610 (match_operand:VQW 1 "register_operand" "w") 1611 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1612 )))] 1613 "TARGET_SIMD" 1614 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>" 1615 [(set_attr "type" "neon_shift_imm_long")] 1616) 1617 1618(define_expand "vec_unpack<su>_hi_<mode>" 1619 [(match_operand:<VWIDE> 0 "register_operand" "") 1620 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1621 "TARGET_SIMD" 1622 { 1623 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1624 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1625 operands[1], p)); 1626 DONE; 1627 } 1628) 1629 1630(define_expand "vec_unpack<su>_lo_<mode>" 1631 [(match_operand:<VWIDE> 0 "register_operand" "") 1632 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] 1633 "TARGET_SIMD" 1634 { 1635 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1636 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1637 operands[1], p)); 1638 DONE; 1639 } 1640) 1641 1642;; Widening arithmetic. 1643 1644(define_insn "*aarch64_<su>mlal_lo<mode>" 1645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1646 (plus:<VWIDE> 1647 (mult:<VWIDE> 1648 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1649 (match_operand:VQW 2 "register_operand" "w") 1650 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1651 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1652 (match_operand:VQW 4 "register_operand" "w") 1653 (match_dup 3)))) 1654 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1655 "TARGET_SIMD" 1656 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1657 [(set_attr "type" "neon_mla_<Vetype>_long")] 1658) 1659 1660(define_insn "*aarch64_<su>mlal_hi<mode>" 1661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1662 (plus:<VWIDE> 1663 (mult:<VWIDE> 1664 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1665 (match_operand:VQW 2 "register_operand" "w") 1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1668 (match_operand:VQW 4 "register_operand" "w") 1669 (match_dup 3)))) 1670 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1671 "TARGET_SIMD" 1672 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1673 [(set_attr "type" "neon_mla_<Vetype>_long")] 1674) 1675 1676(define_insn "*aarch64_<su>mlsl_lo<mode>" 1677 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1678 (minus:<VWIDE> 1679 (match_operand:<VWIDE> 1 "register_operand" "0") 1680 (mult:<VWIDE> 1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1682 (match_operand:VQW 2 "register_operand" "w") 1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1685 (match_operand:VQW 4 "register_operand" "w") 1686 (match_dup 3))))))] 1687 "TARGET_SIMD" 1688 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1689 [(set_attr "type" "neon_mla_<Vetype>_long")] 1690) 1691 1692(define_insn "*aarch64_<su>mlsl_hi<mode>" 1693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1694 (minus:<VWIDE> 1695 (match_operand:<VWIDE> 1 "register_operand" "0") 1696 (mult:<VWIDE> 1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1698 (match_operand:VQW 2 "register_operand" "w") 1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1701 (match_operand:VQW 4 "register_operand" "w") 1702 (match_dup 3))))))] 1703 "TARGET_SIMD" 1704 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1705 [(set_attr "type" "neon_mla_<Vetype>_long")] 1706) 1707 1708(define_insn "*aarch64_<su>mlal<mode>" 1709 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1710 (plus:<VWIDE> 1711 (mult:<VWIDE> 1712 (ANY_EXTEND:<VWIDE> 1713 (match_operand:VD_BHSI 1 "register_operand" "w")) 1714 (ANY_EXTEND:<VWIDE> 1715 (match_operand:VD_BHSI 2 "register_operand" "w"))) 1716 (match_operand:<VWIDE> 3 "register_operand" "0")))] 1717 "TARGET_SIMD" 1718 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1719 [(set_attr "type" "neon_mla_<Vetype>_long")] 1720) 1721 1722(define_insn "*aarch64_<su>mlsl<mode>" 1723 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1724 (minus:<VWIDE> 1725 (match_operand:<VWIDE> 1 "register_operand" "0") 1726 (mult:<VWIDE> 1727 (ANY_EXTEND:<VWIDE> 1728 (match_operand:VD_BHSI 2 "register_operand" "w")) 1729 (ANY_EXTEND:<VWIDE> 1730 (match_operand:VD_BHSI 3 "register_operand" "w")))))] 1731 "TARGET_SIMD" 1732 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 1733 [(set_attr "type" "neon_mla_<Vetype>_long")] 1734) 1735 1736(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" 1737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1738 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1739 (match_operand:VQW 1 "register_operand" "w") 1740 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1741 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1742 (match_operand:VQW 2 "register_operand" "w") 1743 (match_dup 3)))))] 1744 "TARGET_SIMD" 1745 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 1746 [(set_attr "type" "neon_mul_<Vetype>_long")] 1747) 1748 1749(define_expand "vec_widen_<su>mult_lo_<mode>" 1750 [(match_operand:<VWIDE> 0 "register_operand" "") 1751 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1752 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1753 "TARGET_SIMD" 1754 { 1755 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1756 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 1757 operands[1], 1758 operands[2], p)); 1759 DONE; 1760 } 1761) 1762 1763(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" 1764 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1765 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1766 (match_operand:VQW 1 "register_operand" "w") 1767 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1768 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1769 (match_operand:VQW 2 "register_operand" "w") 1770 (match_dup 3)))))] 1771 "TARGET_SIMD" 1772 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1773 [(set_attr "type" "neon_mul_<Vetype>_long")] 1774) 1775 1776(define_expand "vec_widen_<su>mult_hi_<mode>" 1777 [(match_operand:<VWIDE> 0 "register_operand" "") 1778 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1779 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1780 "TARGET_SIMD" 1781 { 1782 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1783 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 1784 operands[1], 1785 operands[2], p)); 1786 DONE; 1787 1788 } 1789) 1790 1791;; FP vector operations. 1792;; AArch64 AdvSIMD supports single-precision (32-bit) and 1793;; double-precision (64-bit) floating-point data types and arithmetic as 1794;; defined by the IEEE 754-2008 standard. This makes them vectorizable 1795;; without the need for -ffast-math or -funsafe-math-optimizations. 1796;; 1797;; Floating-point operations can raise an exception. Vectorizing such 1798;; operations are safe because of reasons explained below. 1799;; 1800;; ARMv8 permits an extension to enable trapped floating-point 1801;; exception handling, however this is an optional feature. In the 1802;; event of a floating-point exception being raised by vectorised 1803;; code then: 1804;; 1. If trapped floating-point exceptions are available, then a trap 1805;; will be taken when any lane raises an enabled exception. A trap 1806;; handler may determine which lane raised the exception. 1807;; 2. Alternatively a sticky exception flag is set in the 1808;; floating-point status register (FPSR). Software may explicitly 1809;; test the exception flags, in which case the tests will either 1810;; prevent vectorisation, allowing precise identification of the 1811;; failing operation, or if tested outside of vectorisable regions 1812;; then the specific operation and lane are not of interest. 1813 1814;; FP arithmetic operations. 1815 1816(define_insn "add<mode>3" 1817 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1818 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1819 (match_operand:VHSDF 2 "register_operand" "w")))] 1820 "TARGET_SIMD" 1821 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1822 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1823) 1824 1825(define_insn "sub<mode>3" 1826 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1827 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1828 (match_operand:VHSDF 2 "register_operand" "w")))] 1829 "TARGET_SIMD" 1830 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1831 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1832) 1833 1834(define_insn "mul<mode>3" 1835 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1836 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1837 (match_operand:VHSDF 2 "register_operand" "w")))] 1838 "TARGET_SIMD" 1839 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1840 [(set_attr "type" "neon_fp_mul_<stype><q>")] 1841) 1842 1843(define_expand "div<mode>3" 1844 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1845 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1846 (match_operand:VHSDF 2 "register_operand" "w")))] 1847 "TARGET_SIMD" 1848{ 1849 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 1850 DONE; 1851 1852 operands[1] = force_reg (<MODE>mode, operands[1]); 1853}) 1854 1855(define_insn "*div<mode>3" 1856 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1857 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1858 (match_operand:VHSDF 2 "register_operand" "w")))] 1859 "TARGET_SIMD" 1860 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1861 [(set_attr "type" "neon_fp_div_<stype><q>")] 1862) 1863 1864(define_insn "neg<mode>2" 1865 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1866 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1867 "TARGET_SIMD" 1868 "fneg\\t%0.<Vtype>, %1.<Vtype>" 1869 [(set_attr "type" "neon_fp_neg_<stype><q>")] 1870) 1871 1872(define_insn "abs<mode>2" 1873 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1874 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1875 "TARGET_SIMD" 1876 "fabs\\t%0.<Vtype>, %1.<Vtype>" 1877 [(set_attr "type" "neon_fp_abs_<stype><q>")] 1878) 1879 1880(define_insn "fma<mode>4" 1881 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1882 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1883 (match_operand:VHSDF 2 "register_operand" "w") 1884 (match_operand:VHSDF 3 "register_operand" "0")))] 1885 "TARGET_SIMD" 1886 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1887 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1888) 1889 1890(define_insn "*aarch64_fma4_elt<mode>" 1891 [(set (match_operand:VDQF 0 "register_operand" "=w") 1892 (fma:VDQF 1893 (vec_duplicate:VDQF 1894 (vec_select:<VEL> 1895 (match_operand:VDQF 1 "register_operand" "<h_con>") 1896 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1897 (match_operand:VDQF 3 "register_operand" "w") 1898 (match_operand:VDQF 4 "register_operand" "0")))] 1899 "TARGET_SIMD" 1900 { 1901 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1902 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1903 } 1904 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1905) 1906 1907(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" 1908 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1909 (fma:VDQSF 1910 (vec_duplicate:VDQSF 1911 (vec_select:<VEL> 1912 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1913 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1914 (match_operand:VDQSF 3 "register_operand" "w") 1915 (match_operand:VDQSF 4 "register_operand" "0")))] 1916 "TARGET_SIMD" 1917 { 1918 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1919 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1920 } 1921 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1922) 1923 1924(define_insn "*aarch64_fma4_elt_from_dup<mode>" 1925 [(set (match_operand:VMUL 0 "register_operand" "=w") 1926 (fma:VMUL 1927 (vec_duplicate:VMUL 1928 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1929 (match_operand:VMUL 2 "register_operand" "w") 1930 (match_operand:VMUL 3 "register_operand" "0")))] 1931 "TARGET_SIMD" 1932 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1933 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 1934) 1935 1936(define_insn "*aarch64_fma4_elt_to_64v2df" 1937 [(set (match_operand:DF 0 "register_operand" "=w") 1938 (fma:DF 1939 (vec_select:DF 1940 (match_operand:V2DF 1 "register_operand" "w") 1941 (parallel [(match_operand:SI 2 "immediate_operand")])) 1942 (match_operand:DF 3 "register_operand" "w") 1943 (match_operand:DF 4 "register_operand" "0")))] 1944 "TARGET_SIMD" 1945 { 1946 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 1947 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; 1948 } 1949 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1950) 1951 1952(define_insn "fnma<mode>4" 1953 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1954 (fma:VHSDF 1955 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) 1956 (match_operand:VHSDF 2 "register_operand" "w") 1957 (match_operand:VHSDF 3 "register_operand" "0")))] 1958 "TARGET_SIMD" 1959 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1960 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1961) 1962 1963(define_insn "*aarch64_fnma4_elt<mode>" 1964 [(set (match_operand:VDQF 0 "register_operand" "=w") 1965 (fma:VDQF 1966 (neg:VDQF 1967 (match_operand:VDQF 3 "register_operand" "w")) 1968 (vec_duplicate:VDQF 1969 (vec_select:<VEL> 1970 (match_operand:VDQF 1 "register_operand" "<h_con>") 1971 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1972 (match_operand:VDQF 4 "register_operand" "0")))] 1973 "TARGET_SIMD" 1974 { 1975 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1976 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1977 } 1978 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1979) 1980 1981(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" 1982 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1983 (fma:VDQSF 1984 (neg:VDQSF 1985 (match_operand:VDQSF 3 "register_operand" "w")) 1986 (vec_duplicate:VDQSF 1987 (vec_select:<VEL> 1988 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1989 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1990 (match_operand:VDQSF 4 "register_operand" "0")))] 1991 "TARGET_SIMD" 1992 { 1993 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1994 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1995 } 1996 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1997) 1998 1999(define_insn "*aarch64_fnma4_elt_from_dup<mode>" 2000 [(set (match_operand:VMUL 0 "register_operand" "=w") 2001 (fma:VMUL 2002 (neg:VMUL 2003 (match_operand:VMUL 2 "register_operand" "w")) 2004 (vec_duplicate:VMUL 2005 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 2006 (match_operand:VMUL 3 "register_operand" "0")))] 2007 "TARGET_SIMD" 2008 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 2009 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 2010) 2011 2012(define_insn "*aarch64_fnma4_elt_to_64v2df" 2013 [(set (match_operand:DF 0 "register_operand" "=w") 2014 (fma:DF 2015 (vec_select:DF 2016 (match_operand:V2DF 1 "register_operand" "w") 2017 (parallel [(match_operand:SI 2 "immediate_operand")])) 2018 (neg:DF 2019 (match_operand:DF 3 "register_operand" "w")) 2020 (match_operand:DF 4 "register_operand" "0")))] 2021 "TARGET_SIMD" 2022 { 2023 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 2024 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; 2025 } 2026 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 2027) 2028 2029;; Vector versions of the floating-point frint patterns. 2030;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. 2031(define_insn "<frint_pattern><mode>2" 2032 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2033 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2034 FRINT))] 2035 "TARGET_SIMD" 2036 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" 2037 [(set_attr "type" "neon_fp_round_<stype><q>")] 2038) 2039 2040;; Vector versions of the fcvt standard patterns. 2041;; Expands to lbtrunc, lround, lceil, lfloor 2042(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" 2043 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2044 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2045 [(match_operand:VHSDF 1 "register_operand" "w")] 2046 FCVT)))] 2047 "TARGET_SIMD" 2048 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" 2049 [(set_attr "type" "neon_fp_to_int_<stype><q>")] 2050) 2051 2052;; HF Scalar variants of related SIMD instructions. 2053(define_insn "l<fcvt_pattern><su_optab>hfhi2" 2054 [(set (match_operand:HI 0 "register_operand" "=w") 2055 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] 2056 FCVT)))] 2057 "TARGET_SIMD_F16INST" 2058 "fcvt<frint_suffix><su>\t%h0, %h1" 2059 [(set_attr "type" "neon_fp_to_int_s")] 2060) 2061 2062(define_insn "<optab>_trunchfhi2" 2063 [(set (match_operand:HI 0 "register_operand" "=w") 2064 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] 2065 "TARGET_SIMD_F16INST" 2066 "fcvtz<su>\t%h0, %h1" 2067 [(set_attr "type" "neon_fp_to_int_s")] 2068) 2069 2070(define_insn "<optab>hihf2" 2071 [(set (match_operand:HF 0 "register_operand" "=w") 2072 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] 2073 "TARGET_SIMD_F16INST" 2074 "<su_optab>cvtf\t%h0, %h1" 2075 [(set_attr "type" "neon_int_to_fp_s")] 2076) 2077 2078(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" 2079 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2080 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2081 [(mult:VDQF 2082 (match_operand:VDQF 1 "register_operand" "w") 2083 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] 2084 UNSPEC_FRINTZ)))] 2085 "TARGET_SIMD 2086 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, 2087 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" 2088 { 2089 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); 2090 char buf[64]; 2091 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); 2092 output_asm_insn (buf, operands); 2093 return ""; 2094 } 2095 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] 2096) 2097 2098(define_expand "<optab><VHSDF:mode><fcvt_target>2" 2099 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2100 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2101 [(match_operand:VHSDF 1 "register_operand")] 2102 UNSPEC_FRINTZ)))] 2103 "TARGET_SIMD" 2104 {}) 2105 2106(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" 2107 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2108 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2109 [(match_operand:VHSDF 1 "register_operand")] 2110 UNSPEC_FRINTZ)))] 2111 "TARGET_SIMD" 2112 {}) 2113 2114(define_expand "ftrunc<VHSDF:mode>2" 2115 [(set (match_operand:VHSDF 0 "register_operand") 2116 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2117 UNSPEC_FRINTZ))] 2118 "TARGET_SIMD" 2119 {}) 2120 2121(define_insn "<optab><fcvt_target><VHSDF:mode>2" 2122 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2123 (FLOATUORS:VHSDF 2124 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] 2125 "TARGET_SIMD" 2126 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" 2127 [(set_attr "type" "neon_int_to_fp_<stype><q>")] 2128) 2129 2130;; Conversions between vectors of floats and doubles. 2131;; Contains a mix of patterns to match standard pattern names 2132;; and those for intrinsics. 2133 2134;; Float widening operations. 2135 2136(define_insn "aarch64_simd_vec_unpacks_lo_<mode>" 2137 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2138 (float_extend:<VWIDE> (vec_select:<VHALF> 2139 (match_operand:VQ_HSF 1 "register_operand" "w") 2140 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") 2141 )))] 2142 "TARGET_SIMD" 2143 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" 2144 [(set_attr "type" "neon_fp_cvt_widen_s")] 2145) 2146 2147;; Convert between fixed-point and floating-point (vector modes) 2148 2149(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" 2150 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") 2151 (unspec:<VHSDF:FCVT_TARGET> 2152 [(match_operand:VHSDF 1 "register_operand" "w") 2153 (match_operand:SI 2 "immediate_operand" "i")] 2154 FCVT_F2FIXED))] 2155 "TARGET_SIMD" 2156 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2157 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] 2158) 2159 2160(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" 2161 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") 2162 (unspec:<VDQ_HSDI:FCVT_TARGET> 2163 [(match_operand:VDQ_HSDI 1 "register_operand" "w") 2164 (match_operand:SI 2 "immediate_operand" "i")] 2165 FCVT_FIXED2F))] 2166 "TARGET_SIMD" 2167 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2168 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] 2169) 2170 2171;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns 2172;; is inconsistent with vector ordering elsewhere in the compiler, in that 2173;; the meaning of HI and LO changes depending on the target endianness. 2174;; While elsewhere we map the higher numbered elements of a vector to 2175;; the lower architectural lanes of the vector, for these patterns we want 2176;; to always treat "hi" as referring to the higher architectural lanes. 2177;; Consequently, while the patterns below look inconsistent with our 2178;; other big-endian patterns their behavior is as required. 2179 2180(define_expand "vec_unpacks_lo_<mode>" 2181 [(match_operand:<VWIDE> 0 "register_operand" "") 2182 (match_operand:VQ_HSF 1 "register_operand" "")] 2183 "TARGET_SIMD" 2184 { 2185 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 2186 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2187 operands[1], p)); 2188 DONE; 2189 } 2190) 2191 2192(define_insn "aarch64_simd_vec_unpacks_hi_<mode>" 2193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2194 (float_extend:<VWIDE> (vec_select:<VHALF> 2195 (match_operand:VQ_HSF 1 "register_operand" "w") 2196 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") 2197 )))] 2198 "TARGET_SIMD" 2199 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" 2200 [(set_attr "type" "neon_fp_cvt_widen_s")] 2201) 2202 2203(define_expand "vec_unpacks_hi_<mode>" 2204 [(match_operand:<VWIDE> 0 "register_operand" "") 2205 (match_operand:VQ_HSF 1 "register_operand" "")] 2206 "TARGET_SIMD" 2207 { 2208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2209 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2210 operands[1], p)); 2211 DONE; 2212 } 2213) 2214(define_insn "aarch64_float_extend_lo_<Vwide>" 2215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2216 (float_extend:<VWIDE> 2217 (match_operand:VDF 1 "register_operand" "w")))] 2218 "TARGET_SIMD" 2219 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" 2220 [(set_attr "type" "neon_fp_cvt_widen_s")] 2221) 2222 2223;; Float narrowing operations. 2224 2225(define_insn "aarch64_float_truncate_lo_<mode>" 2226 [(set (match_operand:VDF 0 "register_operand" "=w") 2227 (float_truncate:VDF 2228 (match_operand:<VWIDE> 1 "register_operand" "w")))] 2229 "TARGET_SIMD" 2230 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" 2231 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2232) 2233 2234(define_insn "aarch64_float_truncate_hi_<Vdbl>_le" 2235 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2236 (vec_concat:<VDBL> 2237 (match_operand:VDF 1 "register_operand" "0") 2238 (float_truncate:VDF 2239 (match_operand:<VWIDE> 2 "register_operand" "w"))))] 2240 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 2241 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2242 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2243) 2244 2245(define_insn "aarch64_float_truncate_hi_<Vdbl>_be" 2246 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2247 (vec_concat:<VDBL> 2248 (float_truncate:VDF 2249 (match_operand:<VWIDE> 2 "register_operand" "w")) 2250 (match_operand:VDF 1 "register_operand" "0")))] 2251 "TARGET_SIMD && BYTES_BIG_ENDIAN" 2252 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2253 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2254) 2255 2256(define_expand "aarch64_float_truncate_hi_<Vdbl>" 2257 [(match_operand:<VDBL> 0 "register_operand" "=w") 2258 (match_operand:VDF 1 "register_operand" "0") 2259 (match_operand:<VWIDE> 2 "register_operand" "w")] 2260 "TARGET_SIMD" 2261{ 2262 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN 2263 ? gen_aarch64_float_truncate_hi_<Vdbl>_be 2264 : gen_aarch64_float_truncate_hi_<Vdbl>_le; 2265 emit_insn (gen (operands[0], operands[1], operands[2])); 2266 DONE; 2267} 2268) 2269 2270(define_expand "vec_pack_trunc_v2df" 2271 [(set (match_operand:V4SF 0 "register_operand") 2272 (vec_concat:V4SF 2273 (float_truncate:V2SF 2274 (match_operand:V2DF 1 "register_operand")) 2275 (float_truncate:V2SF 2276 (match_operand:V2DF 2 "register_operand")) 2277 ))] 2278 "TARGET_SIMD" 2279 { 2280 rtx tmp = gen_reg_rtx (V2SFmode); 2281 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2282 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2283 2284 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); 2285 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], 2286 tmp, operands[hi])); 2287 DONE; 2288 } 2289) 2290 2291(define_expand "vec_pack_trunc_df" 2292 [(set (match_operand:V2SF 0 "register_operand") 2293 (vec_concat:V2SF 2294 (float_truncate:SF 2295 (match_operand:DF 1 "register_operand")) 2296 (float_truncate:SF 2297 (match_operand:DF 2 "register_operand")) 2298 ))] 2299 "TARGET_SIMD" 2300 { 2301 rtx tmp = gen_reg_rtx (V2SFmode); 2302 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2303 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2304 2305 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); 2306 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); 2307 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); 2308 DONE; 2309 } 2310) 2311 2312;; FP Max/Min 2313;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 2314;; expression like: 2315;; a = (b < c) ? b : c; 2316;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and 2317;; -fno-signed-zeros are enabled either explicitly or indirectly via 2318;; -ffast-math. 2319;; 2320;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 2321;; The 'smax' and 'smin' RTL standard pattern names do not specify which 2322;; operand will be returned when both operands are zero (i.e. they may not 2323;; honour signed zeroes), or when either operand is NaN. Therefore GCC 2324;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring 2325;; NaNs. 2326 2327(define_insn "<su><maxmin><mode>3" 2328 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2329 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2330 (match_operand:VHSDF 2 "register_operand" "w")))] 2331 "TARGET_SIMD" 2332 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2333 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2334) 2335 2336;; Vector forms for fmax, fmin, fmaxnm, fminnm. 2337;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, 2338;; which implement the IEEE fmax ()/fmin () functions. 2339(define_insn "<maxmin_uns><mode>3" 2340 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2341 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2342 (match_operand:VHSDF 2 "register_operand" "w")] 2343 FMAXMIN_UNS))] 2344 "TARGET_SIMD" 2345 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2346 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2347) 2348 2349;; 'across lanes' add. 2350 2351(define_expand "reduc_plus_scal_<mode>" 2352 [(match_operand:<VEL> 0 "register_operand" "=w") 2353 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] 2354 UNSPEC_ADDV)] 2355 "TARGET_SIMD" 2356 { 2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2358 rtx scratch = gen_reg_rtx (<MODE>mode); 2359 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 2360 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2361 DONE; 2362 } 2363) 2364 2365(define_insn "aarch64_faddp<mode>" 2366 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2367 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2368 (match_operand:VHSDF 2 "register_operand" "w")] 2369 UNSPEC_FADDV))] 2370 "TARGET_SIMD" 2371 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2372 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] 2373) 2374 2375(define_insn "aarch64_reduc_plus_internal<mode>" 2376 [(set (match_operand:VDQV 0 "register_operand" "=w") 2377 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] 2378 UNSPEC_ADDV))] 2379 "TARGET_SIMD" 2380 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" 2381 [(set_attr "type" "neon_reduc_add<q>")] 2382) 2383 2384(define_insn "aarch64_reduc_plus_internalv2si" 2385 [(set (match_operand:V2SI 0 "register_operand" "=w") 2386 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2387 UNSPEC_ADDV))] 2388 "TARGET_SIMD" 2389 "addp\\t%0.2s, %1.2s, %1.2s" 2390 [(set_attr "type" "neon_reduc_add")] 2391) 2392 2393(define_insn "reduc_plus_scal_<mode>" 2394 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2395 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] 2396 UNSPEC_FADDV))] 2397 "TARGET_SIMD" 2398 "faddp\\t%<Vetype>0, %1.<Vtype>" 2399 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] 2400) 2401 2402(define_expand "reduc_plus_scal_v4sf" 2403 [(set (match_operand:SF 0 "register_operand") 2404 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 2405 UNSPEC_FADDV))] 2406 "TARGET_SIMD" 2407{ 2408 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0); 2409 rtx scratch = gen_reg_rtx (V4SFmode); 2410 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 2411 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 2412 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 2413 DONE; 2414}) 2415 2416(define_insn "clrsb<mode>2" 2417 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2418 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2419 "TARGET_SIMD" 2420 "cls\\t%0.<Vtype>, %1.<Vtype>" 2421 [(set_attr "type" "neon_cls<q>")] 2422) 2423 2424(define_insn "clz<mode>2" 2425 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2426 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2427 "TARGET_SIMD" 2428 "clz\\t%0.<Vtype>, %1.<Vtype>" 2429 [(set_attr "type" "neon_cls<q>")] 2430) 2431 2432(define_insn "popcount<mode>2" 2433 [(set (match_operand:VB 0 "register_operand" "=w") 2434 (popcount:VB (match_operand:VB 1 "register_operand" "w")))] 2435 "TARGET_SIMD" 2436 "cnt\\t%0.<Vbtype>, %1.<Vbtype>" 2437 [(set_attr "type" "neon_cnt<q>")] 2438) 2439 2440;; 'across lanes' max and min ops. 2441 2442;; Template for outputting a scalar, so we can create __builtins which can be 2443;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). 2444(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2445 [(match_operand:<VEL> 0 "register_operand") 2446 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2447 FMAXMINV)] 2448 "TARGET_SIMD" 2449 { 2450 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2451 rtx scratch = gen_reg_rtx (<MODE>mode); 2452 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2453 operands[1])); 2454 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2455 DONE; 2456 } 2457) 2458 2459;; Likewise for integer cases, signed and unsigned. 2460(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2461 [(match_operand:<VEL> 0 "register_operand") 2462 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 2463 MAXMINV)] 2464 "TARGET_SIMD" 2465 { 2466 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2467 rtx scratch = gen_reg_rtx (<MODE>mode); 2468 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2469 operands[1])); 2470 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2471 DONE; 2472 } 2473) 2474 2475(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2476 [(set (match_operand:VDQV_S 0 "register_operand" "=w") 2477 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] 2478 MAXMINV))] 2479 "TARGET_SIMD" 2480 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" 2481 [(set_attr "type" "neon_reduc_minmax<q>")] 2482) 2483 2484(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" 2485 [(set (match_operand:V2SI 0 "register_operand" "=w") 2486 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2487 MAXMINV))] 2488 "TARGET_SIMD" 2489 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" 2490 [(set_attr "type" "neon_reduc_minmax")] 2491) 2492 2493(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2494 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2495 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2496 FMAXMINV))] 2497 "TARGET_SIMD" 2498 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" 2499 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] 2500) 2501 2502;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register 2503;; allocation. 2504;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which 2505;; to select. 2506;; 2507;; Thus our BSL is of the form: 2508;; op0 = bsl (mask, op2, op3) 2509;; We can use any of: 2510;; 2511;; if (op0 = mask) 2512;; bsl mask, op1, op2 2513;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) 2514;; bit op0, op2, mask 2515;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) 2516;; bif op0, op1, mask 2517;; 2518;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 2519;; Some forms of straight-line code may generate the equivalent form 2520;; in *aarch64_simd_bsl<mode>_alt. 2521 2522(define_insn "aarch64_simd_bsl<mode>_internal" 2523 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2524 (xor:VDQ_I 2525 (and:VDQ_I 2526 (xor:VDQ_I 2527 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") 2528 (match_operand:VDQ_I 2 "register_operand" "w,w,0")) 2529 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2530 (match_dup:<V_INT_EQUIV> 3) 2531 ))] 2532 "TARGET_SIMD" 2533 "@ 2534 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 2535 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> 2536 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" 2537 [(set_attr "type" "neon_bsl<q>")] 2538) 2539 2540;; We need this form in addition to the above pattern to match the case 2541;; when combine tries merging three insns such that the second operand of 2542;; the outer XOR matches the second operand of the inner XOR rather than 2543;; the first. The two are equivalent but since recog doesn't try all 2544;; permutations of commutative operations, we have to have a separate pattern. 2545 2546(define_insn "*aarch64_simd_bsl<mode>_alt" 2547 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2548 (xor:VDQ_I 2549 (and:VDQ_I 2550 (xor:VDQ_I 2551 (match_operand:VDQ_I 3 "register_operand" "w,w,0") 2552 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w")) 2553 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2554 (match_dup:<V_INT_EQUIV> 2)))] 2555 "TARGET_SIMD" 2556 "@ 2557 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 2558 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 2559 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 2560 [(set_attr "type" "neon_bsl<q>")] 2561) 2562 2563;; DImode is special, we want to avoid computing operations which are 2564;; more naturally computed in general purpose registers in the vector 2565;; registers. If we do that, we need to move all three operands from general 2566;; purpose registers to vector registers, then back again. However, we 2567;; don't want to make this pattern an UNSPEC as we'd lose scope for 2568;; optimizations based on the component operations of a BSL. 2569;; 2570;; That means we need a splitter back to the individual operations, if they 2571;; would be better calculated on the integer side. 2572 2573(define_insn_and_split "aarch64_simd_bsldi_internal" 2574 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2575 (xor:DI 2576 (and:DI 2577 (xor:DI 2578 (match_operand:DI 3 "register_operand" "w,0,w,r") 2579 (match_operand:DI 2 "register_operand" "w,w,0,r")) 2580 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2581 (match_dup:DI 3) 2582 ))] 2583 "TARGET_SIMD" 2584 "@ 2585 bsl\\t%0.8b, %2.8b, %3.8b 2586 bit\\t%0.8b, %2.8b, %1.8b 2587 bif\\t%0.8b, %3.8b, %1.8b 2588 #" 2589 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2590 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)] 2591{ 2592 /* Split back to individual operations. If we're before reload, and 2593 able to create a temporary register, do so. If we're after reload, 2594 we've got an early-clobber destination register, so use that. 2595 Otherwise, we can't create pseudos and we can't yet guarantee that 2596 operands[0] is safe to write, so FAIL to split. */ 2597 2598 rtx scratch; 2599 if (reload_completed) 2600 scratch = operands[0]; 2601 else if (can_create_pseudo_p ()) 2602 scratch = gen_reg_rtx (DImode); 2603 else 2604 FAIL; 2605 2606 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2607 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2608 emit_insn (gen_xordi3 (operands[0], scratch, operands[3])); 2609 DONE; 2610} 2611 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2612 (set_attr "length" "4,4,4,12")] 2613) 2614 2615(define_insn_and_split "aarch64_simd_bsldi_alt" 2616 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2617 (xor:DI 2618 (and:DI 2619 (xor:DI 2620 (match_operand:DI 3 "register_operand" "w,w,0,r") 2621 (match_operand:DI 2 "register_operand" "w,0,w,r")) 2622 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2623 (match_dup:DI 2) 2624 ))] 2625 "TARGET_SIMD" 2626 "@ 2627 bsl\\t%0.8b, %3.8b, %2.8b 2628 bit\\t%0.8b, %3.8b, %1.8b 2629 bif\\t%0.8b, %2.8b, %1.8b 2630 #" 2631 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2632 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] 2633{ 2634 /* Split back to individual operations. If we're before reload, and 2635 able to create a temporary register, do so. If we're after reload, 2636 we've got an early-clobber destination register, so use that. 2637 Otherwise, we can't create pseudos and we can't yet guarantee that 2638 operands[0] is safe to write, so FAIL to split. */ 2639 2640 rtx scratch; 2641 if (reload_completed) 2642 scratch = operands[0]; 2643 else if (can_create_pseudo_p ()) 2644 scratch = gen_reg_rtx (DImode); 2645 else 2646 FAIL; 2647 2648 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2649 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2650 emit_insn (gen_xordi3 (operands[0], scratch, operands[2])); 2651 DONE; 2652} 2653 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2654 (set_attr "length" "4,4,4,12")] 2655) 2656 2657(define_expand "aarch64_simd_bsl<mode>" 2658 [(match_operand:VALLDIF 0 "register_operand") 2659 (match_operand:<V_INT_EQUIV> 1 "register_operand") 2660 (match_operand:VALLDIF 2 "register_operand") 2661 (match_operand:VALLDIF 3 "register_operand")] 2662 "TARGET_SIMD" 2663{ 2664 /* We can't alias operands together if they have different modes. */ 2665 rtx tmp = operands[0]; 2666 if (FLOAT_MODE_P (<MODE>mode)) 2667 { 2668 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]); 2669 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]); 2670 tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2671 } 2672 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]); 2673 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp, 2674 operands[1], 2675 operands[2], 2676 operands[3])); 2677 if (tmp != operands[0]) 2678 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); 2679 2680 DONE; 2681}) 2682 2683(define_expand "vcond_mask_<mode><v_int_equiv>" 2684 [(match_operand:VALLDI 0 "register_operand") 2685 (match_operand:VALLDI 1 "nonmemory_operand") 2686 (match_operand:VALLDI 2 "nonmemory_operand") 2687 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 2688 "TARGET_SIMD" 2689{ 2690 /* If we have (a = (P) ? -1 : 0); 2691 Then we can simply move the generated mask (result must be int). */ 2692 if (operands[1] == CONSTM1_RTX (<MODE>mode) 2693 && operands[2] == CONST0_RTX (<MODE>mode)) 2694 emit_move_insn (operands[0], operands[3]); 2695 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ 2696 else if (operands[1] == CONST0_RTX (<MODE>mode) 2697 && operands[2] == CONSTM1_RTX (<MODE>mode)) 2698 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3])); 2699 else 2700 { 2701 if (!REG_P (operands[1])) 2702 operands[1] = force_reg (<MODE>mode, operands[1]); 2703 if (!REG_P (operands[2])) 2704 operands[2] = force_reg (<MODE>mode, operands[2]); 2705 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], 2706 operands[1], operands[2])); 2707 } 2708 2709 DONE; 2710}) 2711 2712;; Patterns comparing two vectors to produce a mask. 2713 2714(define_expand "vec_cmp<mode><mode>" 2715 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2716 (match_operator 1 "comparison_operator" 2717 [(match_operand:VSDQ_I_DI 2 "register_operand") 2718 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2719 "TARGET_SIMD" 2720{ 2721 rtx mask = operands[0]; 2722 enum rtx_code code = GET_CODE (operands[1]); 2723 2724 switch (code) 2725 { 2726 case NE: 2727 case LE: 2728 case LT: 2729 case GE: 2730 case GT: 2731 case EQ: 2732 if (operands[3] == CONST0_RTX (<MODE>mode)) 2733 break; 2734 2735 /* Fall through. */ 2736 default: 2737 if (!REG_P (operands[3])) 2738 operands[3] = force_reg (<MODE>mode, operands[3]); 2739 2740 break; 2741 } 2742 2743 switch (code) 2744 { 2745 case LT: 2746 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); 2747 break; 2748 2749 case GE: 2750 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); 2751 break; 2752 2753 case LE: 2754 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); 2755 break; 2756 2757 case GT: 2758 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); 2759 break; 2760 2761 case LTU: 2762 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); 2763 break; 2764 2765 case GEU: 2766 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); 2767 break; 2768 2769 case LEU: 2770 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); 2771 break; 2772 2773 case GTU: 2774 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); 2775 break; 2776 2777 case NE: 2778 /* Handle NE as !EQ. */ 2779 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2780 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask)); 2781 break; 2782 2783 case EQ: 2784 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2785 break; 2786 2787 default: 2788 gcc_unreachable (); 2789 } 2790 2791 DONE; 2792}) 2793 2794(define_expand "vec_cmp<mode><v_int_equiv>" 2795 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2796 (match_operator 1 "comparison_operator" 2797 [(match_operand:VDQF 2 "register_operand") 2798 (match_operand:VDQF 3 "nonmemory_operand")]))] 2799 "TARGET_SIMD" 2800{ 2801 int use_zero_form = 0; 2802 enum rtx_code code = GET_CODE (operands[1]); 2803 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2804 2805 rtx (*comparison) (rtx, rtx, rtx) = NULL; 2806 2807 switch (code) 2808 { 2809 case LE: 2810 case LT: 2811 case GE: 2812 case GT: 2813 case EQ: 2814 if (operands[3] == CONST0_RTX (<MODE>mode)) 2815 { 2816 use_zero_form = 1; 2817 break; 2818 } 2819 /* Fall through. */ 2820 default: 2821 if (!REG_P (operands[3])) 2822 operands[3] = force_reg (<MODE>mode, operands[3]); 2823 2824 break; 2825 } 2826 2827 switch (code) 2828 { 2829 case LT: 2830 if (use_zero_form) 2831 { 2832 comparison = gen_aarch64_cmlt<mode>; 2833 break; 2834 } 2835 /* Fall through. */ 2836 case UNLT: 2837 std::swap (operands[2], operands[3]); 2838 /* Fall through. */ 2839 case UNGT: 2840 case GT: 2841 comparison = gen_aarch64_cmgt<mode>; 2842 break; 2843 case LE: 2844 if (use_zero_form) 2845 { 2846 comparison = gen_aarch64_cmle<mode>; 2847 break; 2848 } 2849 /* Fall through. */ 2850 case UNLE: 2851 std::swap (operands[2], operands[3]); 2852 /* Fall through. */ 2853 case UNGE: 2854 case GE: 2855 comparison = gen_aarch64_cmge<mode>; 2856 break; 2857 case NE: 2858 case EQ: 2859 comparison = gen_aarch64_cmeq<mode>; 2860 break; 2861 case UNEQ: 2862 case ORDERED: 2863 case UNORDERED: 2864 case LTGT: 2865 break; 2866 default: 2867 gcc_unreachable (); 2868 } 2869 2870 switch (code) 2871 { 2872 case UNGE: 2873 case UNGT: 2874 case UNLE: 2875 case UNLT: 2876 { 2877 /* All of the above must not raise any FP exceptions. Thus we first 2878 check each operand for NaNs and force any elements containing NaN to 2879 zero before using them in the compare. 2880 Example: UN<cc> (a, b) -> UNORDERED (a, b) | 2881 (cm<cc> (isnan (a) ? 0.0 : a, 2882 isnan (b) ? 0.0 : b)) 2883 We use the following transformations for doing the comparisions: 2884 a UNGE b -> a GE b 2885 a UNGT b -> a GT b 2886 a UNLE b -> b GE a 2887 a UNLT b -> b GT a. */ 2888 2889 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); 2890 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); 2891 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); 2892 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); 2893 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); 2894 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); 2895 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, 2896 lowpart_subreg (<V_INT_EQUIV>mode, 2897 operands[2], 2898 <MODE>mode))); 2899 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, 2900 lowpart_subreg (<V_INT_EQUIV>mode, 2901 operands[3], 2902 <MODE>mode))); 2903 gcc_assert (comparison != NULL); 2904 emit_insn (comparison (operands[0], 2905 lowpart_subreg (<MODE>mode, 2906 tmp0, <V_INT_EQUIV>mode), 2907 lowpart_subreg (<MODE>mode, 2908 tmp1, <V_INT_EQUIV>mode))); 2909 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); 2910 } 2911 break; 2912 2913 case LT: 2914 case LE: 2915 case GT: 2916 case GE: 2917 case EQ: 2918 case NE: 2919 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 2920 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 2921 a GE b -> a GE b 2922 a GT b -> a GT b 2923 a LE b -> b GE a 2924 a LT b -> b GT a 2925 a EQ b -> a EQ b 2926 a NE b -> ~(a EQ b) */ 2927 gcc_assert (comparison != NULL); 2928 emit_insn (comparison (operands[0], operands[2], operands[3])); 2929 if (code == NE) 2930 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2931 break; 2932 2933 case LTGT: 2934 /* LTGT is not guranteed to not generate a FP exception. So let's 2935 go the faster way : ((a > b) || (b > a)). */ 2936 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 2937 operands[2], operands[3])); 2938 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 2939 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 2940 break; 2941 2942 case ORDERED: 2943 case UNORDERED: 2944 case UNEQ: 2945 /* cmeq (a, a) & cmeq (b, b). */ 2946 emit_insn (gen_aarch64_cmeq<mode> (operands[0], 2947 operands[2], operands[2])); 2948 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); 2949 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); 2950 2951 if (code == UNORDERED) 2952 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2953 else if (code == UNEQ) 2954 { 2955 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); 2956 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); 2957 } 2958 break; 2959 2960 default: 2961 gcc_unreachable (); 2962 } 2963 2964 DONE; 2965}) 2966 2967(define_expand "vec_cmpu<mode><mode>" 2968 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2969 (match_operator 1 "comparison_operator" 2970 [(match_operand:VSDQ_I_DI 2 "register_operand") 2971 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2972 "TARGET_SIMD" 2973{ 2974 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], 2975 operands[2], operands[3])); 2976 DONE; 2977}) 2978 2979(define_expand "vcond<mode><mode>" 2980 [(set (match_operand:VALLDI 0 "register_operand") 2981 (if_then_else:VALLDI 2982 (match_operator 3 "comparison_operator" 2983 [(match_operand:VALLDI 4 "register_operand") 2984 (match_operand:VALLDI 5 "nonmemory_operand")]) 2985 (match_operand:VALLDI 1 "nonmemory_operand") 2986 (match_operand:VALLDI 2 "nonmemory_operand")))] 2987 "TARGET_SIMD" 2988{ 2989 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 2990 enum rtx_code code = GET_CODE (operands[3]); 2991 2992 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2993 it as well as switch operands 1/2 in order to avoid the additional 2994 NOT instruction. */ 2995 if (code == NE) 2996 { 2997 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2998 operands[4], operands[5]); 2999 std::swap (operands[1], operands[2]); 3000 } 3001 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3002 operands[4], operands[5])); 3003 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3004 operands[2], mask)); 3005 3006 DONE; 3007}) 3008 3009(define_expand "vcond<v_cmp_mixed><mode>" 3010 [(set (match_operand:<V_cmp_mixed> 0 "register_operand") 3011 (if_then_else:<V_cmp_mixed> 3012 (match_operator 3 "comparison_operator" 3013 [(match_operand:VDQF_COND 4 "register_operand") 3014 (match_operand:VDQF_COND 5 "nonmemory_operand")]) 3015 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand") 3016 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))] 3017 "TARGET_SIMD" 3018{ 3019 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3020 enum rtx_code code = GET_CODE (operands[3]); 3021 3022 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3023 it as well as switch operands 1/2 in order to avoid the additional 3024 NOT instruction. */ 3025 if (code == NE) 3026 { 3027 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3028 operands[4], operands[5]); 3029 std::swap (operands[1], operands[2]); 3030 } 3031 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3032 operands[4], operands[5])); 3033 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> ( 3034 operands[0], operands[1], 3035 operands[2], mask)); 3036 3037 DONE; 3038}) 3039 3040(define_expand "vcondu<mode><mode>" 3041 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3042 (if_then_else:VSDQ_I_DI 3043 (match_operator 3 "comparison_operator" 3044 [(match_operand:VSDQ_I_DI 4 "register_operand") 3045 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) 3046 (match_operand:VSDQ_I_DI 1 "nonmemory_operand") 3047 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] 3048 "TARGET_SIMD" 3049{ 3050 rtx mask = gen_reg_rtx (<MODE>mode); 3051 enum rtx_code code = GET_CODE (operands[3]); 3052 3053 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3054 it as well as switch operands 1/2 in order to avoid the additional 3055 NOT instruction. */ 3056 if (code == NE) 3057 { 3058 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3059 operands[4], operands[5]); 3060 std::swap (operands[1], operands[2]); 3061 } 3062 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3], 3063 operands[4], operands[5])); 3064 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3065 operands[2], mask)); 3066 DONE; 3067}) 3068 3069(define_expand "vcondu<mode><v_cmp_mixed>" 3070 [(set (match_operand:VDQF 0 "register_operand") 3071 (if_then_else:VDQF 3072 (match_operator 3 "comparison_operator" 3073 [(match_operand:<V_cmp_mixed> 4 "register_operand") 3074 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")]) 3075 (match_operand:VDQF 1 "nonmemory_operand") 3076 (match_operand:VDQF 2 "nonmemory_operand")))] 3077 "TARGET_SIMD" 3078{ 3079 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3080 enum rtx_code code = GET_CODE (operands[3]); 3081 3082 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3083 it as well as switch operands 1/2 in order to avoid the additional 3084 NOT instruction. */ 3085 if (code == NE) 3086 { 3087 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3088 operands[4], operands[5]); 3089 std::swap (operands[1], operands[2]); 3090 } 3091 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> ( 3092 mask, operands[3], 3093 operands[4], operands[5])); 3094 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3095 operands[2], mask)); 3096 DONE; 3097}) 3098 3099;; Patterns for AArch64 SIMD Intrinsics. 3100 3101;; Lane extraction with sign extension to general purpose register. 3102(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" 3103 [(set (match_operand:GPI 0 "register_operand" "=r") 3104 (sign_extend:GPI 3105 (vec_select:<VEL> 3106 (match_operand:VDQQH 1 "register_operand" "w") 3107 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3108 "TARGET_SIMD" 3109 { 3110 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3111 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 3112 } 3113 [(set_attr "type" "neon_to_gp<q>")] 3114) 3115 3116(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" 3117 [(set (match_operand:GPI 0 "register_operand" "=r") 3118 (zero_extend:GPI 3119 (vec_select:<VEL> 3120 (match_operand:VDQQH 1 "register_operand" "w") 3121 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3122 "TARGET_SIMD" 3123 { 3124 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, 3125 INTVAL (operands[2])); 3126 return "umov\\t%w0, %1.<Vetype>[%2]"; 3127 } 3128 [(set_attr "type" "neon_to_gp<q>")] 3129) 3130 3131;; Lane extraction of a value, neither sign nor zero extension 3132;; is guaranteed so upper bits should be considered undefined. 3133;; RTL uses GCC vector extension indices throughout so flip only for assembly. 3134(define_insn "aarch64_get_lane<mode>" 3135 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv") 3136 (vec_select:<VEL> 3137 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 3138 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 3139 "TARGET_SIMD" 3140 { 3141 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3142 switch (which_alternative) 3143 { 3144 case 0: 3145 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 3146 case 1: 3147 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 3148 case 2: 3149 return "st1\\t{%1.<Vetype>}[%2], %0"; 3150 default: 3151 gcc_unreachable (); 3152 } 3153 } 3154 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 3155) 3156 3157(define_insn "load_pair_lanes<mode>" 3158 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 3159 (vec_concat:<VDBL> 3160 (match_operand:VDC 1 "memory_operand" "Utq") 3161 (match_operand:VDC 2 "memory_operand" "m")))] 3162 "TARGET_SIMD && !STRICT_ALIGNMENT 3163 && rtx_equal_p (XEXP (operands[2], 0), 3164 plus_constant (Pmode, 3165 XEXP (operands[1], 0), 3166 GET_MODE_SIZE (<MODE>mode)))" 3167 "ldr\\t%q0, %1" 3168 [(set_attr "type" "neon_load1_1reg_q")] 3169) 3170 3171(define_insn "store_pair_lanes<mode>" 3172 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") 3173 (vec_concat:<VDBL> 3174 (match_operand:VDC 1 "register_operand" "w, r") 3175 (match_operand:VDC 2 "register_operand" "w, r")))] 3176 "TARGET_SIMD" 3177 "@ 3178 stp\\t%d1, %d2, %y0 3179 stp\\t%x1, %x2, %y0" 3180 [(set_attr "type" "neon_stp, store_16")] 3181) 3182 3183;; In this insn, operand 1 should be low, and operand 2 the high part of the 3184;; dest vector. 3185 3186(define_insn "*aarch64_combinez<mode>" 3187 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3188 (vec_concat:<VDBL> 3189 (match_operand:VDC 1 "general_operand" "w,?r,m") 3190 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] 3191 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3192 "@ 3193 mov\\t%0.8b, %1.8b 3194 fmov\t%d0, %1 3195 ldr\\t%d0, %1" 3196 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3197 (set_attr "arch" "simd,fp,simd")] 3198) 3199 3200(define_insn "*aarch64_combinez_be<mode>" 3201 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3202 (vec_concat:<VDBL> 3203 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") 3204 (match_operand:VDC 1 "general_operand" "w,?r,m")))] 3205 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3206 "@ 3207 mov\\t%0.8b, %1.8b 3208 fmov\t%d0, %1 3209 ldr\\t%d0, %1" 3210 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3211 (set_attr "arch" "simd,fp,simd")] 3212) 3213 3214(define_expand "aarch64_combine<mode>" 3215 [(match_operand:<VDBL> 0 "register_operand") 3216 (match_operand:VDC 1 "register_operand") 3217 (match_operand:VDC 2 "register_operand")] 3218 "TARGET_SIMD" 3219{ 3220 aarch64_split_simd_combine (operands[0], operands[1], operands[2]); 3221 3222 DONE; 3223} 3224) 3225 3226(define_expand "@aarch64_simd_combine<mode>" 3227 [(match_operand:<VDBL> 0 "register_operand") 3228 (match_operand:VDC 1 "register_operand") 3229 (match_operand:VDC 2 "register_operand")] 3230 "TARGET_SIMD" 3231 { 3232 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); 3233 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); 3234 DONE; 3235 } 3236[(set_attr "type" "multiple")] 3237) 3238 3239;; <su><addsub>l<q>. 3240 3241(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" 3242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3243 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3244 (match_operand:VQW 1 "register_operand" "w") 3245 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 3246 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3247 (match_operand:VQW 2 "register_operand" "w") 3248 (match_dup 3)))))] 3249 "TARGET_SIMD" 3250 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3251 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3252) 3253 3254(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" 3255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3256 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3257 (match_operand:VQW 1 "register_operand" "w") 3258 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 3259 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3260 (match_operand:VQW 2 "register_operand" "w") 3261 (match_dup 3)))))] 3262 "TARGET_SIMD" 3263 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 3264 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3265) 3266 3267 3268(define_expand "aarch64_saddl2<mode>" 3269 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3270 (match_operand:VQW 1 "register_operand" "w") 3271 (match_operand:VQW 2 "register_operand" "w")] 3272 "TARGET_SIMD" 3273{ 3274 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3275 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 3276 operands[2], p)); 3277 DONE; 3278}) 3279 3280(define_expand "aarch64_uaddl2<mode>" 3281 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3282 (match_operand:VQW 1 "register_operand" "w") 3283 (match_operand:VQW 2 "register_operand" "w")] 3284 "TARGET_SIMD" 3285{ 3286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3287 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 3288 operands[2], p)); 3289 DONE; 3290}) 3291 3292(define_expand "aarch64_ssubl2<mode>" 3293 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3294 (match_operand:VQW 1 "register_operand" "w") 3295 (match_operand:VQW 2 "register_operand" "w")] 3296 "TARGET_SIMD" 3297{ 3298 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3299 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 3300 operands[2], p)); 3301 DONE; 3302}) 3303 3304(define_expand "aarch64_usubl2<mode>" 3305 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3306 (match_operand:VQW 1 "register_operand" "w") 3307 (match_operand:VQW 2 "register_operand" "w")] 3308 "TARGET_SIMD" 3309{ 3310 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3311 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 3312 operands[2], p)); 3313 DONE; 3314}) 3315 3316(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" 3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3318 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> 3319 (match_operand:VD_BHSI 1 "register_operand" "w")) 3320 (ANY_EXTEND:<VWIDE> 3321 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3322 "TARGET_SIMD" 3323 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3324 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3325) 3326 3327;; <su><addsub>w<q>. 3328 3329(define_expand "widen_ssum<mode>3" 3330 [(set (match_operand:<VDBLW> 0 "register_operand" "") 3331 (plus:<VDBLW> (sign_extend:<VDBLW> 3332 (match_operand:VQW 1 "register_operand" "")) 3333 (match_operand:<VDBLW> 2 "register_operand" "")))] 3334 "TARGET_SIMD" 3335 { 3336 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3337 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3338 3339 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 3340 operands[1], p)); 3341 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 3342 DONE; 3343 } 3344) 3345 3346(define_expand "widen_ssum<mode>3" 3347 [(set (match_operand:<VWIDE> 0 "register_operand" "") 3348 (plus:<VWIDE> (sign_extend:<VWIDE> 3349 (match_operand:VD_BHSI 1 "register_operand" "")) 3350 (match_operand:<VWIDE> 2 "register_operand" "")))] 3351 "TARGET_SIMD" 3352{ 3353 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); 3354 DONE; 3355}) 3356 3357(define_expand "widen_usum<mode>3" 3358 [(set (match_operand:<VDBLW> 0 "register_operand" "") 3359 (plus:<VDBLW> (zero_extend:<VDBLW> 3360 (match_operand:VQW 1 "register_operand" "")) 3361 (match_operand:<VDBLW> 2 "register_operand" "")))] 3362 "TARGET_SIMD" 3363 { 3364 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3365 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3366 3367 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 3368 operands[1], p)); 3369 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 3370 DONE; 3371 } 3372) 3373 3374(define_expand "widen_usum<mode>3" 3375 [(set (match_operand:<VWIDE> 0 "register_operand" "") 3376 (plus:<VWIDE> (zero_extend:<VWIDE> 3377 (match_operand:VD_BHSI 1 "register_operand" "")) 3378 (match_operand:<VWIDE> 2 "register_operand" "")))] 3379 "TARGET_SIMD" 3380{ 3381 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 3382 DONE; 3383}) 3384 3385(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>" 3386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3387 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3388 (ANY_EXTEND:<VWIDE> 3389 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3390 "TARGET_SIMD" 3391 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3392 [(set_attr "type" "neon_sub_widen")] 3393) 3394 3395(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal" 3396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3397 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3398 (ANY_EXTEND:<VWIDE> 3399 (vec_select:<VHALF> 3400 (match_operand:VQW 2 "register_operand" "w") 3401 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 3402 "TARGET_SIMD" 3403 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3404 [(set_attr "type" "neon_sub_widen")] 3405) 3406 3407(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" 3408 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3409 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3410 (ANY_EXTEND:<VWIDE> 3411 (vec_select:<VHALF> 3412 (match_operand:VQW 2 "register_operand" "w") 3413 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 3414 "TARGET_SIMD" 3415 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3416 [(set_attr "type" "neon_sub_widen")] 3417) 3418 3419(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>" 3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3421 (plus:<VWIDE> 3422 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w")) 3423 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3424 "TARGET_SIMD" 3425 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3426 [(set_attr "type" "neon_add_widen")] 3427) 3428 3429(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal" 3430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3431 (plus:<VWIDE> 3432 (ANY_EXTEND:<VWIDE> 3433 (vec_select:<VHALF> 3434 (match_operand:VQW 2 "register_operand" "w") 3435 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 3436 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3437 "TARGET_SIMD" 3438 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3439 [(set_attr "type" "neon_add_widen")] 3440) 3441 3442(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal" 3443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3444 (plus:<VWIDE> 3445 (ANY_EXTEND:<VWIDE> 3446 (vec_select:<VHALF> 3447 (match_operand:VQW 2 "register_operand" "w") 3448 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 3449 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3450 "TARGET_SIMD" 3451 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3452 [(set_attr "type" "neon_add_widen")] 3453) 3454 3455(define_expand "aarch64_saddw2<mode>" 3456 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3457 (match_operand:<VWIDE> 1 "register_operand" "w") 3458 (match_operand:VQW 2 "register_operand" "w")] 3459 "TARGET_SIMD" 3460{ 3461 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3462 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 3463 operands[2], p)); 3464 DONE; 3465}) 3466 3467(define_expand "aarch64_uaddw2<mode>" 3468 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3469 (match_operand:<VWIDE> 1 "register_operand" "w") 3470 (match_operand:VQW 2 "register_operand" "w")] 3471 "TARGET_SIMD" 3472{ 3473 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3474 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 3475 operands[2], p)); 3476 DONE; 3477}) 3478 3479 3480(define_expand "aarch64_ssubw2<mode>" 3481 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3482 (match_operand:<VWIDE> 1 "register_operand" "w") 3483 (match_operand:VQW 2 "register_operand" "w")] 3484 "TARGET_SIMD" 3485{ 3486 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3487 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 3488 operands[2], p)); 3489 DONE; 3490}) 3491 3492(define_expand "aarch64_usubw2<mode>" 3493 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3494 (match_operand:<VWIDE> 1 "register_operand" "w") 3495 (match_operand:VQW 2 "register_operand" "w")] 3496 "TARGET_SIMD" 3497{ 3498 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3499 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 3500 operands[2], p)); 3501 DONE; 3502}) 3503 3504;; <su><r>h<addsub>. 3505 3506(define_expand "<u>avg<mode>3_floor" 3507 [(set (match_operand:VDQ_BHSI 0 "register_operand") 3508 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 3509 (match_operand:VDQ_BHSI 2 "register_operand")] 3510 HADD))] 3511 "TARGET_SIMD" 3512) 3513 3514(define_expand "<u>avg<mode>3_ceil" 3515 [(set (match_operand:VDQ_BHSI 0 "register_operand") 3516 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 3517 (match_operand:VDQ_BHSI 2 "register_operand")] 3518 RHADD))] 3519 "TARGET_SIMD" 3520) 3521 3522(define_insn "aarch64_<sur>h<addsub><mode>" 3523 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3524 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 3525 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 3526 HADDSUB))] 3527 "TARGET_SIMD" 3528 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3529 [(set_attr "type" "neon_<addsub>_halve<q>")] 3530) 3531 3532;; <r><addsub>hn<q>. 3533 3534(define_insn "aarch64_<sur><addsub>hn<mode>" 3535 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3536 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 3537 (match_operand:VQN 2 "register_operand" "w")] 3538 ADDSUBHN))] 3539 "TARGET_SIMD" 3540 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" 3541 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3542) 3543 3544(define_insn "aarch64_<sur><addsub>hn2<mode>" 3545 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 3546 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 3547 (match_operand:VQN 2 "register_operand" "w") 3548 (match_operand:VQN 3 "register_operand" "w")] 3549 ADDSUBHN2))] 3550 "TARGET_SIMD" 3551 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" 3552 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3553) 3554 3555;; pmul. 3556 3557(define_insn "aarch64_pmul<mode>" 3558 [(set (match_operand:VB 0 "register_operand" "=w") 3559 (unspec:VB [(match_operand:VB 1 "register_operand" "w") 3560 (match_operand:VB 2 "register_operand" "w")] 3561 UNSPEC_PMUL))] 3562 "TARGET_SIMD" 3563 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3564 [(set_attr "type" "neon_mul_<Vetype><q>")] 3565) 3566 3567;; fmulx. 3568 3569(define_insn "aarch64_fmulx<mode>" 3570 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 3571 (unspec:VHSDF_HSDF 3572 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 3573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 3574 UNSPEC_FMULX))] 3575 "TARGET_SIMD" 3576 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3577 [(set_attr "type" "neon_fp_mul_<stype>")] 3578) 3579 3580;; vmulxq_lane_f32, and vmulx_laneq_f32 3581 3582(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" 3583 [(set (match_operand:VDQSF 0 "register_operand" "=w") 3584 (unspec:VDQSF 3585 [(match_operand:VDQSF 1 "register_operand" "w") 3586 (vec_duplicate:VDQSF 3587 (vec_select:<VEL> 3588 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 3589 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3590 UNSPEC_FMULX))] 3591 "TARGET_SIMD" 3592 { 3593 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3])); 3594 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3595 } 3596 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 3597) 3598 3599;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 3600 3601(define_insn "*aarch64_mulx_elt<mode>" 3602 [(set (match_operand:VDQF 0 "register_operand" "=w") 3603 (unspec:VDQF 3604 [(match_operand:VDQF 1 "register_operand" "w") 3605 (vec_duplicate:VDQF 3606 (vec_select:<VEL> 3607 (match_operand:VDQF 2 "register_operand" "w") 3608 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3609 UNSPEC_FMULX))] 3610 "TARGET_SIMD" 3611 { 3612 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3613 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3614 } 3615 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 3616) 3617 3618;; vmulxq_lane 3619 3620(define_insn "*aarch64_mulx_elt_from_dup<mode>" 3621 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3622 (unspec:VHSDF 3623 [(match_operand:VHSDF 1 "register_operand" "w") 3624 (vec_duplicate:VHSDF 3625 (match_operand:<VEL> 2 "register_operand" "<h_con>"))] 3626 UNSPEC_FMULX))] 3627 "TARGET_SIMD" 3628 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; 3629 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 3630) 3631 3632;; vmulxs_lane_f32, vmulxs_laneq_f32 3633;; vmulxd_lane_f64 == vmulx_lane_f64 3634;; vmulxd_laneq_f64 == vmulx_laneq_f64 3635 3636(define_insn "*aarch64_vgetfmulx<mode>" 3637 [(set (match_operand:<VEL> 0 "register_operand" "=w") 3638 (unspec:<VEL> 3639 [(match_operand:<VEL> 1 "register_operand" "w") 3640 (vec_select:<VEL> 3641 (match_operand:VDQF 2 "register_operand" "w") 3642 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3643 UNSPEC_FMULX))] 3644 "TARGET_SIMD" 3645 { 3646 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3647 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 3648 } 3649 [(set_attr "type" "fmul<Vetype>")] 3650) 3651;; <su>q<addsub> 3652 3653(define_insn "aarch64_<su_optab><optab><mode>" 3654 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3655 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") 3656 (match_operand:VSDQ_I 2 "register_operand" "w")))] 3657 "TARGET_SIMD" 3658 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3659 [(set_attr "type" "neon_<optab><q>")] 3660) 3661 3662;; suqadd and usqadd 3663 3664(define_insn "aarch64_<sur>qadd<mode>" 3665 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3666 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") 3667 (match_operand:VSDQ_I 2 "register_operand" "w")] 3668 USSUQADD))] 3669 "TARGET_SIMD" 3670 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" 3671 [(set_attr "type" "neon_qadd<q>")] 3672) 3673 3674;; sqmovun 3675 3676(define_insn "aarch64_sqmovun<mode>" 3677 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3678 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3679 UNSPEC_SQXTUN))] 3680 "TARGET_SIMD" 3681 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3682 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3683) 3684 3685;; sqmovn and uqmovn 3686 3687(define_insn "aarch64_<sur>qmovn<mode>" 3688 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3689 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3690 SUQMOVN))] 3691 "TARGET_SIMD" 3692 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3693 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3694) 3695 3696;; <su>q<absneg> 3697 3698(define_insn "aarch64_s<optab><mode>" 3699 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3700 (UNQOPS:VSDQ_I 3701 (match_operand:VSDQ_I 1 "register_operand" "w")))] 3702 "TARGET_SIMD" 3703 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 3704 [(set_attr "type" "neon_<optab><q>")] 3705) 3706 3707;; sq<r>dmulh. 3708 3709(define_insn "aarch64_sq<r>dmulh<mode>" 3710 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3711 (unspec:VSDQ_HSI 3712 [(match_operand:VSDQ_HSI 1 "register_operand" "w") 3713 (match_operand:VSDQ_HSI 2 "register_operand" "w")] 3714 VQDMULH))] 3715 "TARGET_SIMD" 3716 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3717 [(set_attr "type" "neon_sat_mul_<Vetype><q>")] 3718) 3719 3720;; sq<r>dmulh_lane 3721 3722(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3723 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3724 (unspec:VDQHS 3725 [(match_operand:VDQHS 1 "register_operand" "w") 3726 (vec_select:<VEL> 3727 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3728 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3729 VQDMULH))] 3730 "TARGET_SIMD" 3731 "* 3732 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3733 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3734 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3735) 3736 3737(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3738 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3739 (unspec:VDQHS 3740 [(match_operand:VDQHS 1 "register_operand" "w") 3741 (vec_select:<VEL> 3742 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3743 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3744 VQDMULH))] 3745 "TARGET_SIMD" 3746 "* 3747 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3748 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3749 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3750) 3751 3752(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3753 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3754 (unspec:SD_HSI 3755 [(match_operand:SD_HSI 1 "register_operand" "w") 3756 (vec_select:<VEL> 3757 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3758 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3759 VQDMULH))] 3760 "TARGET_SIMD" 3761 "* 3762 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3763 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3765) 3766 3767(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3768 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3769 (unspec:SD_HSI 3770 [(match_operand:SD_HSI 1 "register_operand" "w") 3771 (vec_select:<VEL> 3772 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3773 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3774 VQDMULH))] 3775 "TARGET_SIMD" 3776 "* 3777 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3778 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3779 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3780) 3781 3782;; sqrdml[as]h. 3783 3784(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" 3785 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3786 (unspec:VSDQ_HSI 3787 [(match_operand:VSDQ_HSI 1 "register_operand" "0") 3788 (match_operand:VSDQ_HSI 2 "register_operand" "w") 3789 (match_operand:VSDQ_HSI 3 "register_operand" "w")] 3790 SQRDMLH_AS))] 3791 "TARGET_SIMD_RDMA" 3792 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3793 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3794) 3795 3796;; sqrdml[as]h_lane. 3797 3798(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3799 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3800 (unspec:VDQHS 3801 [(match_operand:VDQHS 1 "register_operand" "0") 3802 (match_operand:VDQHS 2 "register_operand" "w") 3803 (vec_select:<VEL> 3804 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3805 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3806 SQRDMLH_AS))] 3807 "TARGET_SIMD_RDMA" 3808 { 3809 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3810 return 3811 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3812 } 3813 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3814) 3815 3816(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3817 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3818 (unspec:SD_HSI 3819 [(match_operand:SD_HSI 1 "register_operand" "0") 3820 (match_operand:SD_HSI 2 "register_operand" "w") 3821 (vec_select:<VEL> 3822 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3823 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3824 SQRDMLH_AS))] 3825 "TARGET_SIMD_RDMA" 3826 { 3827 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3828 return 3829 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 3830 } 3831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3832) 3833 3834;; sqrdml[as]h_laneq. 3835 3836(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3837 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3838 (unspec:VDQHS 3839 [(match_operand:VDQHS 1 "register_operand" "0") 3840 (match_operand:VDQHS 2 "register_operand" "w") 3841 (vec_select:<VEL> 3842 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3843 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3844 SQRDMLH_AS))] 3845 "TARGET_SIMD_RDMA" 3846 { 3847 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3848 return 3849 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3850 } 3851 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3852) 3853 3854(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3855 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3856 (unspec:SD_HSI 3857 [(match_operand:SD_HSI 1 "register_operand" "0") 3858 (match_operand:SD_HSI 2 "register_operand" "w") 3859 (vec_select:<VEL> 3860 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3861 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3862 SQRDMLH_AS))] 3863 "TARGET_SIMD_RDMA" 3864 { 3865 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3866 return 3867 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 3868 } 3869 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3870) 3871 3872;; vqdml[sa]l 3873 3874(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" 3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3876 (SBINQOPS:<VWIDE> 3877 (match_operand:<VWIDE> 1 "register_operand" "0") 3878 (ss_ashift:<VWIDE> 3879 (mult:<VWIDE> 3880 (sign_extend:<VWIDE> 3881 (match_operand:VSD_HSI 2 "register_operand" "w")) 3882 (sign_extend:<VWIDE> 3883 (match_operand:VSD_HSI 3 "register_operand" "w"))) 3884 (const_int 1))))] 3885 "TARGET_SIMD" 3886 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3887 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3888) 3889 3890;; vqdml[sa]l_lane 3891 3892(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3894 (SBINQOPS:<VWIDE> 3895 (match_operand:<VWIDE> 1 "register_operand" "0") 3896 (ss_ashift:<VWIDE> 3897 (mult:<VWIDE> 3898 (sign_extend:<VWIDE> 3899 (match_operand:VD_HSI 2 "register_operand" "w")) 3900 (sign_extend:<VWIDE> 3901 (vec_duplicate:VD_HSI 3902 (vec_select:<VEL> 3903 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3904 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3905 )) 3906 (const_int 1))))] 3907 "TARGET_SIMD" 3908 { 3909 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3910 return 3911 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3912 } 3913 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3914) 3915 3916(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3918 (SBINQOPS:<VWIDE> 3919 (match_operand:<VWIDE> 1 "register_operand" "0") 3920 (ss_ashift:<VWIDE> 3921 (mult:<VWIDE> 3922 (sign_extend:<VWIDE> 3923 (match_operand:VD_HSI 2 "register_operand" "w")) 3924 (sign_extend:<VWIDE> 3925 (vec_duplicate:VD_HSI 3926 (vec_select:<VEL> 3927 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3928 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3929 )) 3930 (const_int 1))))] 3931 "TARGET_SIMD" 3932 { 3933 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3934 return 3935 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3936 } 3937 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3938) 3939 3940(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3942 (SBINQOPS:<VWIDE> 3943 (match_operand:<VWIDE> 1 "register_operand" "0") 3944 (ss_ashift:<VWIDE> 3945 (mult:<VWIDE> 3946 (sign_extend:<VWIDE> 3947 (match_operand:SD_HSI 2 "register_operand" "w")) 3948 (sign_extend:<VWIDE> 3949 (vec_select:<VEL> 3950 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3951 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3952 ) 3953 (const_int 1))))] 3954 "TARGET_SIMD" 3955 { 3956 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3957 return 3958 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3959 } 3960 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3961) 3962 3963(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3964 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3965 (SBINQOPS:<VWIDE> 3966 (match_operand:<VWIDE> 1 "register_operand" "0") 3967 (ss_ashift:<VWIDE> 3968 (mult:<VWIDE> 3969 (sign_extend:<VWIDE> 3970 (match_operand:SD_HSI 2 "register_operand" "w")) 3971 (sign_extend:<VWIDE> 3972 (vec_select:<VEL> 3973 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3974 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3975 ) 3976 (const_int 1))))] 3977 "TARGET_SIMD" 3978 { 3979 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3980 return 3981 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3982 } 3983 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3984) 3985 3986;; vqdml[sa]l_n 3987 3988(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" 3989 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3990 (SBINQOPS:<VWIDE> 3991 (match_operand:<VWIDE> 1 "register_operand" "0") 3992 (ss_ashift:<VWIDE> 3993 (mult:<VWIDE> 3994 (sign_extend:<VWIDE> 3995 (match_operand:VD_HSI 2 "register_operand" "w")) 3996 (sign_extend:<VWIDE> 3997 (vec_duplicate:VD_HSI 3998 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 3999 (const_int 1))))] 4000 "TARGET_SIMD" 4001 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 4002 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4003) 4004 4005;; sqdml[as]l2 4006 4007(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" 4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4009 (SBINQOPS:<VWIDE> 4010 (match_operand:<VWIDE> 1 "register_operand" "0") 4011 (ss_ashift:<VWIDE> 4012 (mult:<VWIDE> 4013 (sign_extend:<VWIDE> 4014 (vec_select:<VHALF> 4015 (match_operand:VQ_HSI 2 "register_operand" "w") 4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4017 (sign_extend:<VWIDE> 4018 (vec_select:<VHALF> 4019 (match_operand:VQ_HSI 3 "register_operand" "w") 4020 (match_dup 4)))) 4021 (const_int 1))))] 4022 "TARGET_SIMD" 4023 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 4024 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4025) 4026 4027(define_expand "aarch64_sqdmlal2<mode>" 4028 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4029 (match_operand:<VWIDE> 1 "register_operand" "w") 4030 (match_operand:VQ_HSI 2 "register_operand" "w") 4031 (match_operand:VQ_HSI 3 "register_operand" "w")] 4032 "TARGET_SIMD" 4033{ 4034 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4035 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 4036 operands[2], operands[3], p)); 4037 DONE; 4038}) 4039 4040(define_expand "aarch64_sqdmlsl2<mode>" 4041 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4042 (match_operand:<VWIDE> 1 "register_operand" "w") 4043 (match_operand:VQ_HSI 2 "register_operand" "w") 4044 (match_operand:VQ_HSI 3 "register_operand" "w")] 4045 "TARGET_SIMD" 4046{ 4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4048 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 4049 operands[2], operands[3], p)); 4050 DONE; 4051}) 4052 4053;; vqdml[sa]l2_lane 4054 4055(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" 4056 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4057 (SBINQOPS:<VWIDE> 4058 (match_operand:<VWIDE> 1 "register_operand" "0") 4059 (ss_ashift:<VWIDE> 4060 (mult:<VWIDE> 4061 (sign_extend:<VWIDE> 4062 (vec_select:<VHALF> 4063 (match_operand:VQ_HSI 2 "register_operand" "w") 4064 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 4065 (sign_extend:<VWIDE> 4066 (vec_duplicate:<VHALF> 4067 (vec_select:<VEL> 4068 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4069 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 4070 )))) 4071 (const_int 1))))] 4072 "TARGET_SIMD" 4073 { 4074 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4075 return 4076 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4077 } 4078 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4079) 4080 4081(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" 4082 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4083 (SBINQOPS:<VWIDE> 4084 (match_operand:<VWIDE> 1 "register_operand" "0") 4085 (ss_ashift:<VWIDE> 4086 (mult:<VWIDE> 4087 (sign_extend:<VWIDE> 4088 (vec_select:<VHALF> 4089 (match_operand:VQ_HSI 2 "register_operand" "w") 4090 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 4091 (sign_extend:<VWIDE> 4092 (vec_duplicate:<VHALF> 4093 (vec_select:<VEL> 4094 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4095 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 4096 )))) 4097 (const_int 1))))] 4098 "TARGET_SIMD" 4099 { 4100 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4101 return 4102 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4103 } 4104 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4105) 4106 4107(define_expand "aarch64_sqdmlal2_lane<mode>" 4108 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4109 (match_operand:<VWIDE> 1 "register_operand" "w") 4110 (match_operand:VQ_HSI 2 "register_operand" "w") 4111 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4112 (match_operand:SI 4 "immediate_operand" "i")] 4113 "TARGET_SIMD" 4114{ 4115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4116 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 4117 operands[2], operands[3], 4118 operands[4], p)); 4119 DONE; 4120}) 4121 4122(define_expand "aarch64_sqdmlal2_laneq<mode>" 4123 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4124 (match_operand:<VWIDE> 1 "register_operand" "w") 4125 (match_operand:VQ_HSI 2 "register_operand" "w") 4126 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4127 (match_operand:SI 4 "immediate_operand" "i")] 4128 "TARGET_SIMD" 4129{ 4130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4131 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 4132 operands[2], operands[3], 4133 operands[4], p)); 4134 DONE; 4135}) 4136 4137(define_expand "aarch64_sqdmlsl2_lane<mode>" 4138 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4139 (match_operand:<VWIDE> 1 "register_operand" "w") 4140 (match_operand:VQ_HSI 2 "register_operand" "w") 4141 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4142 (match_operand:SI 4 "immediate_operand" "i")] 4143 "TARGET_SIMD" 4144{ 4145 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4146 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 4147 operands[2], operands[3], 4148 operands[4], p)); 4149 DONE; 4150}) 4151 4152(define_expand "aarch64_sqdmlsl2_laneq<mode>" 4153 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4154 (match_operand:<VWIDE> 1 "register_operand" "w") 4155 (match_operand:VQ_HSI 2 "register_operand" "w") 4156 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4157 (match_operand:SI 4 "immediate_operand" "i")] 4158 "TARGET_SIMD" 4159{ 4160 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4161 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 4162 operands[2], operands[3], 4163 operands[4], p)); 4164 DONE; 4165}) 4166 4167(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" 4168 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4169 (SBINQOPS:<VWIDE> 4170 (match_operand:<VWIDE> 1 "register_operand" "0") 4171 (ss_ashift:<VWIDE> 4172 (mult:<VWIDE> 4173 (sign_extend:<VWIDE> 4174 (vec_select:<VHALF> 4175 (match_operand:VQ_HSI 2 "register_operand" "w") 4176 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4177 (sign_extend:<VWIDE> 4178 (vec_duplicate:<VHALF> 4179 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 4180 (const_int 1))))] 4181 "TARGET_SIMD" 4182 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 4183 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4184) 4185 4186(define_expand "aarch64_sqdmlal2_n<mode>" 4187 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4188 (match_operand:<VWIDE> 1 "register_operand" "w") 4189 (match_operand:VQ_HSI 2 "register_operand" "w") 4190 (match_operand:<VEL> 3 "register_operand" "w")] 4191 "TARGET_SIMD" 4192{ 4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4194 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 4195 operands[2], operands[3], 4196 p)); 4197 DONE; 4198}) 4199 4200(define_expand "aarch64_sqdmlsl2_n<mode>" 4201 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4202 (match_operand:<VWIDE> 1 "register_operand" "w") 4203 (match_operand:VQ_HSI 2 "register_operand" "w") 4204 (match_operand:<VEL> 3 "register_operand" "w")] 4205 "TARGET_SIMD" 4206{ 4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4208 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 4209 operands[2], operands[3], 4210 p)); 4211 DONE; 4212}) 4213 4214;; vqdmull 4215 4216(define_insn "aarch64_sqdmull<mode>" 4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4218 (ss_ashift:<VWIDE> 4219 (mult:<VWIDE> 4220 (sign_extend:<VWIDE> 4221 (match_operand:VSD_HSI 1 "register_operand" "w")) 4222 (sign_extend:<VWIDE> 4223 (match_operand:VSD_HSI 2 "register_operand" "w"))) 4224 (const_int 1)))] 4225 "TARGET_SIMD" 4226 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4227 [(set_attr "type" "neon_sat_mul_<Vetype>_long")] 4228) 4229 4230;; vqdmull_lane 4231 4232(define_insn "aarch64_sqdmull_lane<mode>" 4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4234 (ss_ashift:<VWIDE> 4235 (mult:<VWIDE> 4236 (sign_extend:<VWIDE> 4237 (match_operand:VD_HSI 1 "register_operand" "w")) 4238 (sign_extend:<VWIDE> 4239 (vec_duplicate:VD_HSI 4240 (vec_select:<VEL> 4241 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4242 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4243 )) 4244 (const_int 1)))] 4245 "TARGET_SIMD" 4246 { 4247 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4248 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4249 } 4250 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4251) 4252 4253(define_insn "aarch64_sqdmull_laneq<mode>" 4254 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4255 (ss_ashift:<VWIDE> 4256 (mult:<VWIDE> 4257 (sign_extend:<VWIDE> 4258 (match_operand:VD_HSI 1 "register_operand" "w")) 4259 (sign_extend:<VWIDE> 4260 (vec_duplicate:VD_HSI 4261 (vec_select:<VEL> 4262 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4263 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4264 )) 4265 (const_int 1)))] 4266 "TARGET_SIMD" 4267 { 4268 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4269 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4270 } 4271 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4272) 4273 4274(define_insn "aarch64_sqdmull_lane<mode>" 4275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4276 (ss_ashift:<VWIDE> 4277 (mult:<VWIDE> 4278 (sign_extend:<VWIDE> 4279 (match_operand:SD_HSI 1 "register_operand" "w")) 4280 (sign_extend:<VWIDE> 4281 (vec_select:<VEL> 4282 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4283 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4284 )) 4285 (const_int 1)))] 4286 "TARGET_SIMD" 4287 { 4288 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4289 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4290 } 4291 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4292) 4293 4294(define_insn "aarch64_sqdmull_laneq<mode>" 4295 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4296 (ss_ashift:<VWIDE> 4297 (mult:<VWIDE> 4298 (sign_extend:<VWIDE> 4299 (match_operand:SD_HSI 1 "register_operand" "w")) 4300 (sign_extend:<VWIDE> 4301 (vec_select:<VEL> 4302 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4303 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4304 )) 4305 (const_int 1)))] 4306 "TARGET_SIMD" 4307 { 4308 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4309 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4310 } 4311 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4312) 4313 4314;; vqdmull_n 4315 4316(define_insn "aarch64_sqdmull_n<mode>" 4317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4318 (ss_ashift:<VWIDE> 4319 (mult:<VWIDE> 4320 (sign_extend:<VWIDE> 4321 (match_operand:VD_HSI 1 "register_operand" "w")) 4322 (sign_extend:<VWIDE> 4323 (vec_duplicate:VD_HSI 4324 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4325 ) 4326 (const_int 1)))] 4327 "TARGET_SIMD" 4328 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4329 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4330) 4331 4332;; vqdmull2 4333 4334 4335 4336(define_insn "aarch64_sqdmull2<mode>_internal" 4337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4338 (ss_ashift:<VWIDE> 4339 (mult:<VWIDE> 4340 (sign_extend:<VWIDE> 4341 (vec_select:<VHALF> 4342 (match_operand:VQ_HSI 1 "register_operand" "w") 4343 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4344 (sign_extend:<VWIDE> 4345 (vec_select:<VHALF> 4346 (match_operand:VQ_HSI 2 "register_operand" "w") 4347 (match_dup 3))) 4348 ) 4349 (const_int 1)))] 4350 "TARGET_SIMD" 4351 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4352 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4353) 4354 4355(define_expand "aarch64_sqdmull2<mode>" 4356 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4357 (match_operand:VQ_HSI 1 "register_operand" "w") 4358 (match_operand:VQ_HSI 2 "register_operand" "w")] 4359 "TARGET_SIMD" 4360{ 4361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4362 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 4363 operands[2], p)); 4364 DONE; 4365}) 4366 4367;; vqdmull2_lane 4368 4369(define_insn "aarch64_sqdmull2_lane<mode>_internal" 4370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4371 (ss_ashift:<VWIDE> 4372 (mult:<VWIDE> 4373 (sign_extend:<VWIDE> 4374 (vec_select:<VHALF> 4375 (match_operand:VQ_HSI 1 "register_operand" "w") 4376 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4377 (sign_extend:<VWIDE> 4378 (vec_duplicate:<VHALF> 4379 (vec_select:<VEL> 4380 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4381 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4382 )) 4383 (const_int 1)))] 4384 "TARGET_SIMD" 4385 { 4386 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4387 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4388 } 4389 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4390) 4391 4392(define_insn "aarch64_sqdmull2_laneq<mode>_internal" 4393 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4394 (ss_ashift:<VWIDE> 4395 (mult:<VWIDE> 4396 (sign_extend:<VWIDE> 4397 (vec_select:<VHALF> 4398 (match_operand:VQ_HSI 1 "register_operand" "w") 4399 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4400 (sign_extend:<VWIDE> 4401 (vec_duplicate:<VHALF> 4402 (vec_select:<VEL> 4403 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4404 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4405 )) 4406 (const_int 1)))] 4407 "TARGET_SIMD" 4408 { 4409 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4410 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4411 } 4412 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4413) 4414 4415(define_expand "aarch64_sqdmull2_lane<mode>" 4416 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4417 (match_operand:VQ_HSI 1 "register_operand" "w") 4418 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4419 (match_operand:SI 3 "immediate_operand" "i")] 4420 "TARGET_SIMD" 4421{ 4422 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4423 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 4424 operands[2], operands[3], 4425 p)); 4426 DONE; 4427}) 4428 4429(define_expand "aarch64_sqdmull2_laneq<mode>" 4430 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4431 (match_operand:VQ_HSI 1 "register_operand" "w") 4432 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4433 (match_operand:SI 3 "immediate_operand" "i")] 4434 "TARGET_SIMD" 4435{ 4436 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4437 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 4438 operands[2], operands[3], 4439 p)); 4440 DONE; 4441}) 4442 4443;; vqdmull2_n 4444 4445(define_insn "aarch64_sqdmull2_n<mode>_internal" 4446 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4447 (ss_ashift:<VWIDE> 4448 (mult:<VWIDE> 4449 (sign_extend:<VWIDE> 4450 (vec_select:<VHALF> 4451 (match_operand:VQ_HSI 1 "register_operand" "w") 4452 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4453 (sign_extend:<VWIDE> 4454 (vec_duplicate:<VHALF> 4455 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4456 ) 4457 (const_int 1)))] 4458 "TARGET_SIMD" 4459 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4460 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4461) 4462 4463(define_expand "aarch64_sqdmull2_n<mode>" 4464 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4465 (match_operand:VQ_HSI 1 "register_operand" "w") 4466 (match_operand:<VEL> 2 "register_operand" "w")] 4467 "TARGET_SIMD" 4468{ 4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4470 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 4471 operands[2], p)); 4472 DONE; 4473}) 4474 4475;; vshl 4476 4477(define_insn "aarch64_<sur>shl<mode>" 4478 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4479 (unspec:VSDQ_I_DI 4480 [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4481 (match_operand:VSDQ_I_DI 2 "register_operand" "w")] 4482 VSHL))] 4483 "TARGET_SIMD" 4484 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4485 [(set_attr "type" "neon_shift_reg<q>")] 4486) 4487 4488 4489;; vqshl 4490 4491(define_insn "aarch64_<sur>q<r>shl<mode>" 4492 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4493 (unspec:VSDQ_I 4494 [(match_operand:VSDQ_I 1 "register_operand" "w") 4495 (match_operand:VSDQ_I 2 "register_operand" "w")] 4496 VQSHL))] 4497 "TARGET_SIMD" 4498 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4499 [(set_attr "type" "neon_sat_shift_reg<q>")] 4500) 4501 4502;; vshll_n 4503 4504(define_insn "aarch64_<sur>shll_n<mode>" 4505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4506 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 4507 (match_operand:SI 2 4508 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 4509 VSHLL))] 4510 "TARGET_SIMD" 4511 { 4512 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4513 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4514 else 4515 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4516 } 4517 [(set_attr "type" "neon_shift_imm_long")] 4518) 4519 4520;; vshll_high_n 4521 4522(define_insn "aarch64_<sur>shll2_n<mode>" 4523 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4524 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 4525 (match_operand:SI 2 "immediate_operand" "i")] 4526 VSHLL))] 4527 "TARGET_SIMD" 4528 { 4529 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4530 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4531 else 4532 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4533 } 4534 [(set_attr "type" "neon_shift_imm_long")] 4535) 4536 4537;; vrshr_n 4538 4539(define_insn "aarch64_<sur>shr_n<mode>" 4540 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4541 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4542 (match_operand:SI 2 4543 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4544 VRSHR_N))] 4545 "TARGET_SIMD" 4546 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4547 [(set_attr "type" "neon_sat_shift_imm<q>")] 4548) 4549 4550;; v(r)sra_n 4551 4552(define_insn "aarch64_<sur>sra_n<mode>" 4553 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4554 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4555 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4556 (match_operand:SI 3 4557 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4558 VSRA))] 4559 "TARGET_SIMD" 4560 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4561 [(set_attr "type" "neon_shift_acc<q>")] 4562) 4563 4564;; vs<lr>i_n 4565 4566(define_insn "aarch64_<sur>s<lr>i_n<mode>" 4567 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4568 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4569 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4570 (match_operand:SI 3 4571 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] 4572 VSLRI))] 4573 "TARGET_SIMD" 4574 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4575 [(set_attr "type" "neon_shift_imm<q>")] 4576) 4577 4578;; vqshl(u) 4579 4580(define_insn "aarch64_<sur>qshl<u>_n<mode>" 4581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") 4583 (match_operand:SI 2 4584 "aarch64_simd_shift_imm_<ve_mode>" "i")] 4585 VQSHL_N))] 4586 "TARGET_SIMD" 4587 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4588 [(set_attr "type" "neon_sat_shift_imm<q>")] 4589) 4590 4591 4592;; vq(r)shr(u)n_n 4593 4594(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" 4595 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4596 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") 4597 (match_operand:SI 2 4598 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4599 VQSHRN_N))] 4600 "TARGET_SIMD" 4601 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" 4602 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4603) 4604 4605 4606;; cm(eq|ge|gt|lt|le) 4607;; Note, we have constraints for Dz and Z as different expanders 4608;; have different ideas of what should be passed to this pattern. 4609 4610(define_insn "aarch64_cm<optab><mode>" 4611 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4612 (neg:<V_INT_EQUIV> 4613 (COMPARISONS:<V_INT_EQUIV> 4614 (match_operand:VDQ_I 1 "register_operand" "w,w") 4615 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4616 )))] 4617 "TARGET_SIMD" 4618 "@ 4619 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4620 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" 4621 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] 4622) 4623 4624(define_insn_and_split "aarch64_cm<optab>di" 4625 [(set (match_operand:DI 0 "register_operand" "=w,w,r") 4626 (neg:DI 4627 (COMPARISONS:DI 4628 (match_operand:DI 1 "register_operand" "w,w,r") 4629 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 4630 ))) 4631 (clobber (reg:CC CC_REGNUM))] 4632 "TARGET_SIMD" 4633 "#" 4634 "&& reload_completed" 4635 [(set (match_operand:DI 0 "register_operand") 4636 (neg:DI 4637 (COMPARISONS:DI 4638 (match_operand:DI 1 "register_operand") 4639 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4640 )))] 4641 { 4642 /* If we are in the general purpose register file, 4643 we split to a sequence of comparison and store. */ 4644 if (GP_REGNUM_P (REGNO (operands[0])) 4645 && GP_REGNUM_P (REGNO (operands[1]))) 4646 { 4647 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); 4648 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4649 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4650 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4651 DONE; 4652 } 4653 /* Otherwise, we expand to a similar pattern which does not 4654 clobber CC_REGNUM. */ 4655 } 4656 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] 4657) 4658 4659(define_insn "*aarch64_cm<optab>di" 4660 [(set (match_operand:DI 0 "register_operand" "=w,w") 4661 (neg:DI 4662 (COMPARISONS:DI 4663 (match_operand:DI 1 "register_operand" "w,w") 4664 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4665 )))] 4666 "TARGET_SIMD && reload_completed" 4667 "@ 4668 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> 4669 cm<optab>\t%d0, %d1, #0" 4670 [(set_attr "type" "neon_compare, neon_compare_zero")] 4671) 4672 4673;; cm(hs|hi) 4674 4675(define_insn "aarch64_cm<optab><mode>" 4676 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4677 (neg:<V_INT_EQUIV> 4678 (UCOMPARISONS:<V_INT_EQUIV> 4679 (match_operand:VDQ_I 1 "register_operand" "w") 4680 (match_operand:VDQ_I 2 "register_operand" "w") 4681 )))] 4682 "TARGET_SIMD" 4683 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4684 [(set_attr "type" "neon_compare<q>")] 4685) 4686 4687(define_insn_and_split "aarch64_cm<optab>di" 4688 [(set (match_operand:DI 0 "register_operand" "=w,r") 4689 (neg:DI 4690 (UCOMPARISONS:DI 4691 (match_operand:DI 1 "register_operand" "w,r") 4692 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 4693 ))) 4694 (clobber (reg:CC CC_REGNUM))] 4695 "TARGET_SIMD" 4696 "#" 4697 "&& reload_completed" 4698 [(set (match_operand:DI 0 "register_operand") 4699 (neg:DI 4700 (UCOMPARISONS:DI 4701 (match_operand:DI 1 "register_operand") 4702 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4703 )))] 4704 { 4705 /* If we are in the general purpose register file, 4706 we split to a sequence of comparison and store. */ 4707 if (GP_REGNUM_P (REGNO (operands[0])) 4708 && GP_REGNUM_P (REGNO (operands[1]))) 4709 { 4710 machine_mode mode = CCmode; 4711 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4712 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4713 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4714 DONE; 4715 } 4716 /* Otherwise, we expand to a similar pattern which does not 4717 clobber CC_REGNUM. */ 4718 } 4719 [(set_attr "type" "neon_compare,multiple")] 4720) 4721 4722(define_insn "*aarch64_cm<optab>di" 4723 [(set (match_operand:DI 0 "register_operand" "=w") 4724 (neg:DI 4725 (UCOMPARISONS:DI 4726 (match_operand:DI 1 "register_operand" "w") 4727 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") 4728 )))] 4729 "TARGET_SIMD && reload_completed" 4730 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" 4731 [(set_attr "type" "neon_compare")] 4732) 4733 4734;; cmtst 4735 4736;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, 4737;; we don't have any insns using ne, and aarch64_vcond outputs 4738;; not (neg (eq (and x y) 0)) 4739;; which is rewritten by simplify_rtx as 4740;; plus (eq (and x y) 0) -1. 4741 4742(define_insn "aarch64_cmtst<mode>" 4743 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4744 (plus:<V_INT_EQUIV> 4745 (eq:<V_INT_EQUIV> 4746 (and:VDQ_I 4747 (match_operand:VDQ_I 1 "register_operand" "w") 4748 (match_operand:VDQ_I 2 "register_operand" "w")) 4749 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) 4750 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one"))) 4751 ] 4752 "TARGET_SIMD" 4753 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4754 [(set_attr "type" "neon_tst<q>")] 4755) 4756 4757(define_insn_and_split "aarch64_cmtstdi" 4758 [(set (match_operand:DI 0 "register_operand" "=w,r") 4759 (neg:DI 4760 (ne:DI 4761 (and:DI 4762 (match_operand:DI 1 "register_operand" "w,r") 4763 (match_operand:DI 2 "register_operand" "w,r")) 4764 (const_int 0)))) 4765 (clobber (reg:CC CC_REGNUM))] 4766 "TARGET_SIMD" 4767 "#" 4768 "&& reload_completed" 4769 [(set (match_operand:DI 0 "register_operand") 4770 (neg:DI 4771 (ne:DI 4772 (and:DI 4773 (match_operand:DI 1 "register_operand") 4774 (match_operand:DI 2 "register_operand")) 4775 (const_int 0))))] 4776 { 4777 /* If we are in the general purpose register file, 4778 we split to a sequence of comparison and store. */ 4779 if (GP_REGNUM_P (REGNO (operands[0])) 4780 && GP_REGNUM_P (REGNO (operands[1]))) 4781 { 4782 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); 4783 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); 4784 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); 4785 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); 4786 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4787 DONE; 4788 } 4789 /* Otherwise, we expand to a similar pattern which does not 4790 clobber CC_REGNUM. */ 4791 } 4792 [(set_attr "type" "neon_tst,multiple")] 4793) 4794 4795(define_insn "*aarch64_cmtstdi" 4796 [(set (match_operand:DI 0 "register_operand" "=w") 4797 (neg:DI 4798 (ne:DI 4799 (and:DI 4800 (match_operand:DI 1 "register_operand" "w") 4801 (match_operand:DI 2 "register_operand" "w")) 4802 (const_int 0))))] 4803 "TARGET_SIMD" 4804 "cmtst\t%d0, %d1, %d2" 4805 [(set_attr "type" "neon_tst")] 4806) 4807 4808;; fcm(eq|ge|gt|le|lt) 4809 4810(define_insn "aarch64_cm<optab><mode>" 4811 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4812 (neg:<V_INT_EQUIV> 4813 (COMPARISONS:<V_INT_EQUIV> 4814 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") 4815 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") 4816 )))] 4817 "TARGET_SIMD" 4818 "@ 4819 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4820 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" 4821 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4822) 4823 4824;; fac(ge|gt) 4825;; Note we can also handle what would be fac(le|lt) by 4826;; generating fac(ge|gt). 4827 4828(define_insn "aarch64_fac<optab><mode>" 4829 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4830 (neg:<V_INT_EQUIV> 4831 (FAC_COMPARISONS:<V_INT_EQUIV> 4832 (abs:VHSDF_HSDF 4833 (match_operand:VHSDF_HSDF 1 "register_operand" "w")) 4834 (abs:VHSDF_HSDF 4835 (match_operand:VHSDF_HSDF 2 "register_operand" "w")) 4836 )))] 4837 "TARGET_SIMD" 4838 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4839 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4840) 4841 4842;; addp 4843 4844(define_insn "aarch64_addp<mode>" 4845 [(set (match_operand:VD_BHSI 0 "register_operand" "=w") 4846 (unspec:VD_BHSI 4847 [(match_operand:VD_BHSI 1 "register_operand" "w") 4848 (match_operand:VD_BHSI 2 "register_operand" "w")] 4849 UNSPEC_ADDP))] 4850 "TARGET_SIMD" 4851 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4852 [(set_attr "type" "neon_reduc_add<q>")] 4853) 4854 4855(define_insn "aarch64_addpdi" 4856 [(set (match_operand:DI 0 "register_operand" "=w") 4857 (unspec:DI 4858 [(match_operand:V2DI 1 "register_operand" "w")] 4859 UNSPEC_ADDP))] 4860 "TARGET_SIMD" 4861 "addp\t%d0, %1.2d" 4862 [(set_attr "type" "neon_reduc_add")] 4863) 4864 4865;; sqrt 4866 4867(define_expand "sqrt<mode>2" 4868 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4869 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4870 "TARGET_SIMD" 4871{ 4872 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 4873 DONE; 4874}) 4875 4876(define_insn "*sqrt<mode>2" 4877 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4878 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4879 "TARGET_SIMD" 4880 "fsqrt\\t%0.<Vtype>, %1.<Vtype>" 4881 [(set_attr "type" "neon_fp_sqrt_<stype><q>")] 4882) 4883 4884;; Patterns for vector struct loads and stores. 4885 4886(define_insn "aarch64_simd_ld2<mode>" 4887 [(set (match_operand:OI 0 "register_operand" "=w") 4888 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4889 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4890 UNSPEC_LD2))] 4891 "TARGET_SIMD" 4892 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4893 [(set_attr "type" "neon_load2_2reg<q>")] 4894) 4895 4896(define_insn "aarch64_simd_ld2r<mode>" 4897 [(set (match_operand:OI 0 "register_operand" "=w") 4898 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4899 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4900 UNSPEC_LD2_DUP))] 4901 "TARGET_SIMD" 4902 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4903 [(set_attr "type" "neon_load2_all_lanes<q>")] 4904) 4905 4906(define_insn "aarch64_vec_load_lanesoi_lane<mode>" 4907 [(set (match_operand:OI 0 "register_operand" "=w") 4908 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4909 (match_operand:OI 2 "register_operand" "0") 4910 (match_operand:SI 3 "immediate_operand" "i") 4911 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4912 UNSPEC_LD2_LANE))] 4913 "TARGET_SIMD" 4914 { 4915 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4916 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 4917 } 4918 [(set_attr "type" "neon_load2_one_lane")] 4919) 4920 4921(define_expand "vec_load_lanesoi<mode>" 4922 [(set (match_operand:OI 0 "register_operand" "=w") 4923 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4924 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4925 UNSPEC_LD2))] 4926 "TARGET_SIMD" 4927{ 4928 if (BYTES_BIG_ENDIAN) 4929 { 4930 rtx tmp = gen_reg_rtx (OImode); 4931 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4932 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 4933 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 4934 } 4935 else 4936 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 4937 DONE; 4938}) 4939 4940(define_insn "aarch64_simd_st2<mode>" 4941 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4942 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4943 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4944 UNSPEC_ST2))] 4945 "TARGET_SIMD" 4946 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 4947 [(set_attr "type" "neon_store2_2reg<q>")] 4948) 4949 4950;; RTL uses GCC vector extension indices, so flip only for assembly. 4951(define_insn "aarch64_vec_store_lanesoi_lane<mode>" 4952 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4953 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 4954 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4955 (match_operand:SI 2 "immediate_operand" "i")] 4956 UNSPEC_ST2_LANE))] 4957 "TARGET_SIMD" 4958 { 4959 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 4960 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 4961 } 4962 [(set_attr "type" "neon_store2_one_lane<q>")] 4963) 4964 4965(define_expand "vec_store_lanesoi<mode>" 4966 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4967 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4968 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4969 UNSPEC_ST2))] 4970 "TARGET_SIMD" 4971{ 4972 if (BYTES_BIG_ENDIAN) 4973 { 4974 rtx tmp = gen_reg_rtx (OImode); 4975 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4976 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 4977 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 4978 } 4979 else 4980 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 4981 DONE; 4982}) 4983 4984(define_insn "aarch64_simd_ld3<mode>" 4985 [(set (match_operand:CI 0 "register_operand" "=w") 4986 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 4987 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4988 UNSPEC_LD3))] 4989 "TARGET_SIMD" 4990 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 4991 [(set_attr "type" "neon_load3_3reg<q>")] 4992) 4993 4994(define_insn "aarch64_simd_ld3r<mode>" 4995 [(set (match_operand:CI 0 "register_operand" "=w") 4996 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4997 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4998 UNSPEC_LD3_DUP))] 4999 "TARGET_SIMD" 5000 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5001 [(set_attr "type" "neon_load3_all_lanes<q>")] 5002) 5003 5004(define_insn "aarch64_vec_load_lanesci_lane<mode>" 5005 [(set (match_operand:CI 0 "register_operand" "=w") 5006 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5007 (match_operand:CI 2 "register_operand" "0") 5008 (match_operand:SI 3 "immediate_operand" "i") 5009 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5010 UNSPEC_LD3_LANE))] 5011 "TARGET_SIMD" 5012{ 5013 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 5014 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 5015} 5016 [(set_attr "type" "neon_load3_one_lane")] 5017) 5018 5019(define_expand "vec_load_lanesci<mode>" 5020 [(set (match_operand:CI 0 "register_operand" "=w") 5021 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 5022 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5023 UNSPEC_LD3))] 5024 "TARGET_SIMD" 5025{ 5026 if (BYTES_BIG_ENDIAN) 5027 { 5028 rtx tmp = gen_reg_rtx (CImode); 5029 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5030 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 5031 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 5032 } 5033 else 5034 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 5035 DONE; 5036}) 5037 5038(define_insn "aarch64_simd_st3<mode>" 5039 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 5040 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 5041 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5042 UNSPEC_ST3))] 5043 "TARGET_SIMD" 5044 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5045 [(set_attr "type" "neon_store3_3reg<q>")] 5046) 5047 5048;; RTL uses GCC vector extension indices, so flip only for assembly. 5049(define_insn "aarch64_vec_store_lanesci_lane<mode>" 5050 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5051 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5052 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5053 (match_operand:SI 2 "immediate_operand" "i")] 5054 UNSPEC_ST3_LANE))] 5055 "TARGET_SIMD" 5056 { 5057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5058 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 5059 } 5060 [(set_attr "type" "neon_store3_one_lane<q>")] 5061) 5062 5063(define_expand "vec_store_lanesci<mode>" 5064 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 5065 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 5066 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5067 UNSPEC_ST3))] 5068 "TARGET_SIMD" 5069{ 5070 if (BYTES_BIG_ENDIAN) 5071 { 5072 rtx tmp = gen_reg_rtx (CImode); 5073 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5074 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 5075 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 5076 } 5077 else 5078 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 5079 DONE; 5080}) 5081 5082(define_insn "aarch64_simd_ld4<mode>" 5083 [(set (match_operand:XI 0 "register_operand" "=w") 5084 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 5085 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5086 UNSPEC_LD4))] 5087 "TARGET_SIMD" 5088 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5089 [(set_attr "type" "neon_load4_4reg<q>")] 5090) 5091 5092(define_insn "aarch64_simd_ld4r<mode>" 5093 [(set (match_operand:XI 0 "register_operand" "=w") 5094 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5095 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 5096 UNSPEC_LD4_DUP))] 5097 "TARGET_SIMD" 5098 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5099 [(set_attr "type" "neon_load4_all_lanes<q>")] 5100) 5101 5102(define_insn "aarch64_vec_load_lanesxi_lane<mode>" 5103 [(set (match_operand:XI 0 "register_operand" "=w") 5104 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5105 (match_operand:XI 2 "register_operand" "0") 5106 (match_operand:SI 3 "immediate_operand" "i") 5107 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5108 UNSPEC_LD4_LANE))] 5109 "TARGET_SIMD" 5110{ 5111 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 5112 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 5113} 5114 [(set_attr "type" "neon_load4_one_lane")] 5115) 5116 5117(define_expand "vec_load_lanesxi<mode>" 5118 [(set (match_operand:XI 0 "register_operand" "=w") 5119 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 5120 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5121 UNSPEC_LD4))] 5122 "TARGET_SIMD" 5123{ 5124 if (BYTES_BIG_ENDIAN) 5125 { 5126 rtx tmp = gen_reg_rtx (XImode); 5127 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5128 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 5129 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 5130 } 5131 else 5132 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 5133 DONE; 5134}) 5135 5136(define_insn "aarch64_simd_st4<mode>" 5137 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 5138 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 5139 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5140 UNSPEC_ST4))] 5141 "TARGET_SIMD" 5142 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5143 [(set_attr "type" "neon_store4_4reg<q>")] 5144) 5145 5146;; RTL uses GCC vector extension indices, so flip only for assembly. 5147(define_insn "aarch64_vec_store_lanesxi_lane<mode>" 5148 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5149 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5150 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5151 (match_operand:SI 2 "immediate_operand" "i")] 5152 UNSPEC_ST4_LANE))] 5153 "TARGET_SIMD" 5154 { 5155 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5156 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 5157 } 5158 [(set_attr "type" "neon_store4_one_lane<q>")] 5159) 5160 5161(define_expand "vec_store_lanesxi<mode>" 5162 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 5163 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 5164 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5165 UNSPEC_ST4))] 5166 "TARGET_SIMD" 5167{ 5168 if (BYTES_BIG_ENDIAN) 5169 { 5170 rtx tmp = gen_reg_rtx (XImode); 5171 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5172 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 5173 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 5174 } 5175 else 5176 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 5177 DONE; 5178}) 5179 5180(define_insn_and_split "aarch64_rev_reglist<mode>" 5181[(set (match_operand:VSTRUCT 0 "register_operand" "=&w") 5182 (unspec:VSTRUCT 5183 [(match_operand:VSTRUCT 1 "register_operand" "w") 5184 (match_operand:V16QI 2 "register_operand" "w")] 5185 UNSPEC_REV_REGLIST))] 5186 "TARGET_SIMD" 5187 "#" 5188 "&& reload_completed" 5189 [(const_int 0)] 5190{ 5191 int i; 5192 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; 5193 for (i = 0; i < nregs; i++) 5194 { 5195 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); 5196 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); 5197 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); 5198 } 5199 DONE; 5200} 5201 [(set_attr "type" "neon_tbl1_q") 5202 (set_attr "length" "<insn_count>")] 5203) 5204 5205;; Reload patterns for AdvSIMD register list operands. 5206 5207(define_expand "mov<mode>" 5208 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") 5209 (match_operand:VSTRUCT 1 "general_operand" ""))] 5210 "TARGET_SIMD" 5211{ 5212 if (can_create_pseudo_p ()) 5213 { 5214 if (GET_CODE (operands[0]) != REG) 5215 operands[1] = force_reg (<MODE>mode, operands[1]); 5216 } 5217}) 5218 5219 5220(define_expand "aarch64_ld1x3<VALLDIF:mode>" 5221 [(match_operand:CI 0 "register_operand" "=w") 5222 (match_operand:DI 1 "register_operand" "r") 5223 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5224 "TARGET_SIMD" 5225{ 5226 rtx mem = gen_rtx_MEM (CImode, operands[1]); 5227 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem)); 5228 DONE; 5229}) 5230 5231(define_insn "aarch64_ld1_x3_<mode>" 5232 [(set (match_operand:CI 0 "register_operand" "=w") 5233 (unspec:CI 5234 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 5235 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))] 5236 "TARGET_SIMD" 5237 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5238 [(set_attr "type" "neon_load1_3reg<q>")] 5239) 5240 5241(define_expand "aarch64_st1x2<VALLDIF:mode>" 5242 [(match_operand:DI 0 "register_operand" "") 5243 (match_operand:OI 1 "register_operand" "") 5244 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5245 "TARGET_SIMD" 5246{ 5247 rtx mem = gen_rtx_MEM (OImode, operands[0]); 5248 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1])); 5249 DONE; 5250}) 5251 5252(define_insn "aarch64_st1_x2_<mode>" 5253 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 5254 (unspec:OI 5255 [(match_operand:OI 1 "register_operand" "w") 5256 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 5257 "TARGET_SIMD" 5258 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5259 [(set_attr "type" "neon_store1_2reg<q>")] 5260) 5261 5262(define_expand "aarch64_st1x3<VALLDIF:mode>" 5263 [(match_operand:DI 0 "register_operand" "") 5264 (match_operand:CI 1 "register_operand" "") 5265 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5266 "TARGET_SIMD" 5267{ 5268 rtx mem = gen_rtx_MEM (CImode, operands[0]); 5269 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1])); 5270 DONE; 5271}) 5272 5273(define_insn "aarch64_st1_x3_<mode>" 5274 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 5275 (unspec:CI 5276 [(match_operand:CI 1 "register_operand" "w") 5277 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 5278 "TARGET_SIMD" 5279 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5280 [(set_attr "type" "neon_store1_3reg<q>")] 5281) 5282 5283(define_insn "*aarch64_mov<mode>" 5284 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 5285 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 5286 "TARGET_SIMD && !BYTES_BIG_ENDIAN 5287 && (register_operand (operands[0], <MODE>mode) 5288 || register_operand (operands[1], <MODE>mode))" 5289 "@ 5290 # 5291 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 5292 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" 5293 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ 5294 neon_load<nregs>_<nregs>reg_q") 5295 (set_attr "length" "<insn_count>,4,4")] 5296) 5297 5298(define_insn "aarch64_be_ld1<mode>" 5299 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") 5300 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 5301 "aarch64_simd_struct_operand" "Utv")] 5302 UNSPEC_LD1))] 5303 "TARGET_SIMD" 5304 "ld1\\t{%0<Vmtype>}, %1" 5305 [(set_attr "type" "neon_load1_1reg<q>")] 5306) 5307 5308(define_insn "aarch64_be_st1<mode>" 5309 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") 5310 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] 5311 UNSPEC_ST1))] 5312 "TARGET_SIMD" 5313 "st1\\t{%1<Vmtype>}, %0" 5314 [(set_attr "type" "neon_store1_1reg<q>")] 5315) 5316 5317(define_insn "*aarch64_be_movoi" 5318 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") 5319 (match_operand:OI 1 "general_operand" " w,w,m"))] 5320 "TARGET_SIMD && BYTES_BIG_ENDIAN 5321 && (register_operand (operands[0], OImode) 5322 || register_operand (operands[1], OImode))" 5323 "@ 5324 # 5325 stp\\t%q1, %R1, %0 5326 ldp\\t%q0, %R0, %1" 5327 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") 5328 (set_attr "length" "8,4,4")] 5329) 5330 5331(define_insn "*aarch64_be_movci" 5332 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") 5333 (match_operand:CI 1 "general_operand" " w,w,o"))] 5334 "TARGET_SIMD && BYTES_BIG_ENDIAN 5335 && (register_operand (operands[0], CImode) 5336 || register_operand (operands[1], CImode))" 5337 "#" 5338 [(set_attr "type" "multiple") 5339 (set_attr "length" "12,4,4")] 5340) 5341 5342(define_insn "*aarch64_be_movxi" 5343 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") 5344 (match_operand:XI 1 "general_operand" " w,w,o"))] 5345 "TARGET_SIMD && BYTES_BIG_ENDIAN 5346 && (register_operand (operands[0], XImode) 5347 || register_operand (operands[1], XImode))" 5348 "#" 5349 [(set_attr "type" "multiple") 5350 (set_attr "length" "16,4,4")] 5351) 5352 5353(define_split 5354 [(set (match_operand:OI 0 "register_operand") 5355 (match_operand:OI 1 "register_operand"))] 5356 "TARGET_SIMD && reload_completed" 5357 [(const_int 0)] 5358{ 5359 aarch64_simd_emit_reg_reg_move (operands, TImode, 2); 5360 DONE; 5361}) 5362 5363(define_split 5364 [(set (match_operand:CI 0 "nonimmediate_operand") 5365 (match_operand:CI 1 "general_operand"))] 5366 "TARGET_SIMD && reload_completed" 5367 [(const_int 0)] 5368{ 5369 if (register_operand (operands[0], CImode) 5370 && register_operand (operands[1], CImode)) 5371 { 5372 aarch64_simd_emit_reg_reg_move (operands, TImode, 3); 5373 DONE; 5374 } 5375 else if (BYTES_BIG_ENDIAN) 5376 { 5377 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), 5378 simplify_gen_subreg (OImode, operands[1], CImode, 0)); 5379 emit_move_insn (gen_lowpart (V16QImode, 5380 simplify_gen_subreg (TImode, operands[0], 5381 CImode, 32)), 5382 gen_lowpart (V16QImode, 5383 simplify_gen_subreg (TImode, operands[1], 5384 CImode, 32))); 5385 DONE; 5386 } 5387 else 5388 FAIL; 5389}) 5390 5391(define_split 5392 [(set (match_operand:XI 0 "nonimmediate_operand") 5393 (match_operand:XI 1 "general_operand"))] 5394 "TARGET_SIMD && reload_completed" 5395 [(const_int 0)] 5396{ 5397 if (register_operand (operands[0], XImode) 5398 && register_operand (operands[1], XImode)) 5399 { 5400 aarch64_simd_emit_reg_reg_move (operands, TImode, 4); 5401 DONE; 5402 } 5403 else if (BYTES_BIG_ENDIAN) 5404 { 5405 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), 5406 simplify_gen_subreg (OImode, operands[1], XImode, 0)); 5407 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), 5408 simplify_gen_subreg (OImode, operands[1], XImode, 32)); 5409 DONE; 5410 } 5411 else 5412 FAIL; 5413}) 5414 5415(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" 5416 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5417 (match_operand:DI 1 "register_operand" "w") 5418 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5419 "TARGET_SIMD" 5420{ 5421 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5422 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5423 * <VSTRUCT:nregs>); 5424 5425 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], 5426 mem)); 5427 DONE; 5428}) 5429 5430(define_insn "aarch64_ld2<mode>_dreg" 5431 [(set (match_operand:OI 0 "register_operand" "=w") 5432 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5433 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5434 UNSPEC_LD2_DREG))] 5435 "TARGET_SIMD" 5436 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5437 [(set_attr "type" "neon_load2_2reg<q>")] 5438) 5439 5440(define_insn "aarch64_ld2<mode>_dreg" 5441 [(set (match_operand:OI 0 "register_operand" "=w") 5442 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5443 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5444 UNSPEC_LD2_DREG))] 5445 "TARGET_SIMD" 5446 "ld1\\t{%S0.1d - %T0.1d}, %1" 5447 [(set_attr "type" "neon_load1_2reg<q>")] 5448) 5449 5450(define_insn "aarch64_ld3<mode>_dreg" 5451 [(set (match_operand:CI 0 "register_operand" "=w") 5452 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5453 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5454 UNSPEC_LD3_DREG))] 5455 "TARGET_SIMD" 5456 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5457 [(set_attr "type" "neon_load3_3reg<q>")] 5458) 5459 5460(define_insn "aarch64_ld3<mode>_dreg" 5461 [(set (match_operand:CI 0 "register_operand" "=w") 5462 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5463 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5464 UNSPEC_LD3_DREG))] 5465 "TARGET_SIMD" 5466 "ld1\\t{%S0.1d - %U0.1d}, %1" 5467 [(set_attr "type" "neon_load1_3reg<q>")] 5468) 5469 5470(define_insn "aarch64_ld4<mode>_dreg" 5471 [(set (match_operand:XI 0 "register_operand" "=w") 5472 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5473 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5474 UNSPEC_LD4_DREG))] 5475 "TARGET_SIMD" 5476 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5477 [(set_attr "type" "neon_load4_4reg<q>")] 5478) 5479 5480(define_insn "aarch64_ld4<mode>_dreg" 5481 [(set (match_operand:XI 0 "register_operand" "=w") 5482 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5483 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5484 UNSPEC_LD4_DREG))] 5485 "TARGET_SIMD" 5486 "ld1\\t{%S0.1d - %V0.1d}, %1" 5487 [(set_attr "type" "neon_load1_4reg<q>")] 5488) 5489 5490(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" 5491 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5492 (match_operand:DI 1 "register_operand" "r") 5493 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5494 "TARGET_SIMD" 5495{ 5496 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5497 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5498 5499 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); 5500 DONE; 5501}) 5502 5503(define_expand "aarch64_ld1<VALL_F16:mode>" 5504 [(match_operand:VALL_F16 0 "register_operand") 5505 (match_operand:DI 1 "register_operand")] 5506 "TARGET_SIMD" 5507{ 5508 machine_mode mode = <VALL_F16:MODE>mode; 5509 rtx mem = gen_rtx_MEM (mode, operands[1]); 5510 5511 if (BYTES_BIG_ENDIAN) 5512 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); 5513 else 5514 emit_move_insn (operands[0], mem); 5515 DONE; 5516}) 5517 5518(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" 5519 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5520 (match_operand:DI 1 "register_operand" "r") 5521 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5522 "TARGET_SIMD" 5523{ 5524 machine_mode mode = <VSTRUCT:MODE>mode; 5525 rtx mem = gen_rtx_MEM (mode, operands[1]); 5526 5527 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 5528 DONE; 5529}) 5530 5531(define_expand "aarch64_ld1x2<VQ:mode>" 5532 [(match_operand:OI 0 "register_operand" "=w") 5533 (match_operand:DI 1 "register_operand" "r") 5534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5535 "TARGET_SIMD" 5536{ 5537 machine_mode mode = OImode; 5538 rtx mem = gen_rtx_MEM (mode, operands[1]); 5539 5540 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem)); 5541 DONE; 5542}) 5543 5544(define_expand "aarch64_ld1x2<VDC:mode>" 5545 [(match_operand:OI 0 "register_operand" "=w") 5546 (match_operand:DI 1 "register_operand" "r") 5547 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5548 "TARGET_SIMD" 5549{ 5550 machine_mode mode = OImode; 5551 rtx mem = gen_rtx_MEM (mode, operands[1]); 5552 5553 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem)); 5554 DONE; 5555}) 5556 5557 5558(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5559 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5560 (match_operand:DI 1 "register_operand" "w") 5561 (match_operand:VSTRUCT 2 "register_operand" "0") 5562 (match_operand:SI 3 "immediate_operand" "i") 5563 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5564 "TARGET_SIMD" 5565{ 5566 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5567 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5568 * <VSTRUCT:nregs>); 5569 5570 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); 5571 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5572 operands[0], mem, operands[2], operands[3])); 5573 DONE; 5574}) 5575 5576;; Expanders for builtins to extract vector registers from large 5577;; opaque integer modes. 5578 5579;; D-register list. 5580 5581(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" 5582 [(match_operand:VDC 0 "register_operand" "=w") 5583 (match_operand:VSTRUCT 1 "register_operand" "w") 5584 (match_operand:SI 2 "immediate_operand" "i")] 5585 "TARGET_SIMD" 5586{ 5587 int part = INTVAL (operands[2]); 5588 rtx temp = gen_reg_rtx (<VDC:VDBL>mode); 5589 int offset = part * 16; 5590 5591 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); 5592 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); 5593 DONE; 5594}) 5595 5596;; Q-register list. 5597 5598(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" 5599 [(match_operand:VQ 0 "register_operand" "=w") 5600 (match_operand:VSTRUCT 1 "register_operand" "w") 5601 (match_operand:SI 2 "immediate_operand" "i")] 5602 "TARGET_SIMD" 5603{ 5604 int part = INTVAL (operands[2]); 5605 int offset = part * 16; 5606 5607 emit_move_insn (operands[0], 5608 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); 5609 DONE; 5610}) 5611 5612;; Permuted-store expanders for neon intrinsics. 5613 5614;; Permute instructions 5615 5616;; vec_perm support 5617 5618(define_expand "vec_perm<mode>" 5619 [(match_operand:VB 0 "register_operand") 5620 (match_operand:VB 1 "register_operand") 5621 (match_operand:VB 2 "register_operand") 5622 (match_operand:VB 3 "register_operand")] 5623 "TARGET_SIMD" 5624{ 5625 aarch64_expand_vec_perm (operands[0], operands[1], 5626 operands[2], operands[3], <nunits>); 5627 DONE; 5628}) 5629 5630(define_insn "aarch64_tbl1<mode>" 5631 [(set (match_operand:VB 0 "register_operand" "=w") 5632 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") 5633 (match_operand:VB 2 "register_operand" "w")] 5634 UNSPEC_TBL))] 5635 "TARGET_SIMD" 5636 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" 5637 [(set_attr "type" "neon_tbl1<q>")] 5638) 5639 5640;; Two source registers. 5641 5642(define_insn "aarch64_tbl2v16qi" 5643 [(set (match_operand:V16QI 0 "register_operand" "=w") 5644 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") 5645 (match_operand:V16QI 2 "register_operand" "w")] 5646 UNSPEC_TBL))] 5647 "TARGET_SIMD" 5648 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" 5649 [(set_attr "type" "neon_tbl2_q")] 5650) 5651 5652(define_insn "aarch64_tbl3<mode>" 5653 [(set (match_operand:VB 0 "register_operand" "=w") 5654 (unspec:VB [(match_operand:OI 1 "register_operand" "w") 5655 (match_operand:VB 2 "register_operand" "w")] 5656 UNSPEC_TBL))] 5657 "TARGET_SIMD" 5658 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" 5659 [(set_attr "type" "neon_tbl3")] 5660) 5661 5662(define_insn "aarch64_tbx4<mode>" 5663 [(set (match_operand:VB 0 "register_operand" "=w") 5664 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5665 (match_operand:OI 2 "register_operand" "w") 5666 (match_operand:VB 3 "register_operand" "w")] 5667 UNSPEC_TBX))] 5668 "TARGET_SIMD" 5669 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" 5670 [(set_attr "type" "neon_tbl4")] 5671) 5672 5673;; Three source registers. 5674 5675(define_insn "aarch64_qtbl3<mode>" 5676 [(set (match_operand:VB 0 "register_operand" "=w") 5677 (unspec:VB [(match_operand:CI 1 "register_operand" "w") 5678 (match_operand:VB 2 "register_operand" "w")] 5679 UNSPEC_TBL))] 5680 "TARGET_SIMD" 5681 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" 5682 [(set_attr "type" "neon_tbl3")] 5683) 5684 5685(define_insn "aarch64_qtbx3<mode>" 5686 [(set (match_operand:VB 0 "register_operand" "=w") 5687 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5688 (match_operand:CI 2 "register_operand" "w") 5689 (match_operand:VB 3 "register_operand" "w")] 5690 UNSPEC_TBX))] 5691 "TARGET_SIMD" 5692 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" 5693 [(set_attr "type" "neon_tbl3")] 5694) 5695 5696;; Four source registers. 5697 5698(define_insn "aarch64_qtbl4<mode>" 5699 [(set (match_operand:VB 0 "register_operand" "=w") 5700 (unspec:VB [(match_operand:XI 1 "register_operand" "w") 5701 (match_operand:VB 2 "register_operand" "w")] 5702 UNSPEC_TBL))] 5703 "TARGET_SIMD" 5704 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" 5705 [(set_attr "type" "neon_tbl4")] 5706) 5707 5708(define_insn "aarch64_qtbx4<mode>" 5709 [(set (match_operand:VB 0 "register_operand" "=w") 5710 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5711 (match_operand:XI 2 "register_operand" "w") 5712 (match_operand:VB 3 "register_operand" "w")] 5713 UNSPEC_TBX))] 5714 "TARGET_SIMD" 5715 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" 5716 [(set_attr "type" "neon_tbl4")] 5717) 5718 5719(define_insn_and_split "aarch64_combinev16qi" 5720 [(set (match_operand:OI 0 "register_operand" "=w") 5721 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") 5722 (match_operand:V16QI 2 "register_operand" "w")] 5723 UNSPEC_CONCAT))] 5724 "TARGET_SIMD" 5725 "#" 5726 "&& reload_completed" 5727 [(const_int 0)] 5728{ 5729 aarch64_split_combinev16qi (operands); 5730 DONE; 5731} 5732[(set_attr "type" "multiple")] 5733) 5734 5735;; This instruction's pattern is generated directly by 5736;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5737;; need corresponding changes there. 5738(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" 5739 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5740 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5741 (match_operand:VALL_F16 2 "register_operand" "w")] 5742 PERMUTE))] 5743 "TARGET_SIMD" 5744 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 5745 [(set_attr "type" "neon_permute<q>")] 5746) 5747 5748;; This instruction's pattern is generated directly by 5749;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5750;; need corresponding changes there. Note that the immediate (third) 5751;; operand is a lane index not a byte index. 5752(define_insn "aarch64_ext<mode>" 5753 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5754 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5755 (match_operand:VALL_F16 2 "register_operand" "w") 5756 (match_operand:SI 3 "immediate_operand" "i")] 5757 UNSPEC_EXT))] 5758 "TARGET_SIMD" 5759{ 5760 operands[3] = GEN_INT (INTVAL (operands[3]) 5761 * GET_MODE_UNIT_SIZE (<MODE>mode)); 5762 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 5763} 5764 [(set_attr "type" "neon_ext<q>")] 5765) 5766 5767;; This instruction's pattern is generated directly by 5768;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5769;; need corresponding changes there. 5770(define_insn "aarch64_rev<REVERSE:rev_op><mode>" 5771 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5772 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 5773 REVERSE))] 5774 "TARGET_SIMD" 5775 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" 5776 [(set_attr "type" "neon_rev<q>")] 5777) 5778 5779(define_insn "aarch64_st2<mode>_dreg" 5780 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5781 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5782 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5783 UNSPEC_ST2))] 5784 "TARGET_SIMD" 5785 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5786 [(set_attr "type" "neon_store2_2reg")] 5787) 5788 5789(define_insn "aarch64_st2<mode>_dreg" 5790 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5791 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5792 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5793 UNSPEC_ST2))] 5794 "TARGET_SIMD" 5795 "st1\\t{%S1.1d - %T1.1d}, %0" 5796 [(set_attr "type" "neon_store1_2reg")] 5797) 5798 5799(define_insn "aarch64_st3<mode>_dreg" 5800 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5801 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5802 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5803 UNSPEC_ST3))] 5804 "TARGET_SIMD" 5805 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5806 [(set_attr "type" "neon_store3_3reg")] 5807) 5808 5809(define_insn "aarch64_st3<mode>_dreg" 5810 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5811 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5812 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5813 UNSPEC_ST3))] 5814 "TARGET_SIMD" 5815 "st1\\t{%S1.1d - %U1.1d}, %0" 5816 [(set_attr "type" "neon_store1_3reg")] 5817) 5818 5819(define_insn "aarch64_st4<mode>_dreg" 5820 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5821 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5822 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5823 UNSPEC_ST4))] 5824 "TARGET_SIMD" 5825 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5826 [(set_attr "type" "neon_store4_4reg")] 5827) 5828 5829(define_insn "aarch64_st4<mode>_dreg" 5830 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5831 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5832 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5833 UNSPEC_ST4))] 5834 "TARGET_SIMD" 5835 "st1\\t{%S1.1d - %V1.1d}, %0" 5836 [(set_attr "type" "neon_store1_4reg")] 5837) 5838 5839(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" 5840 [(match_operand:DI 0 "register_operand" "r") 5841 (match_operand:VSTRUCT 1 "register_operand" "w") 5842 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5843 "TARGET_SIMD" 5844{ 5845 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5846 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5847 5848 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); 5849 DONE; 5850}) 5851 5852(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" 5853 [(match_operand:DI 0 "register_operand" "r") 5854 (match_operand:VSTRUCT 1 "register_operand" "w") 5855 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5856 "TARGET_SIMD" 5857{ 5858 machine_mode mode = <VSTRUCT:MODE>mode; 5859 rtx mem = gen_rtx_MEM (mode, operands[0]); 5860 5861 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); 5862 DONE; 5863}) 5864 5865(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5866 [(match_operand:DI 0 "register_operand" "r") 5867 (match_operand:VSTRUCT 1 "register_operand" "w") 5868 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5869 (match_operand:SI 2 "immediate_operand")] 5870 "TARGET_SIMD" 5871{ 5872 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5873 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5874 * <VSTRUCT:nregs>); 5875 5876 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5877 mem, operands[1], operands[2])); 5878 DONE; 5879}) 5880 5881(define_expand "aarch64_st1<VALL_F16:mode>" 5882 [(match_operand:DI 0 "register_operand") 5883 (match_operand:VALL_F16 1 "register_operand")] 5884 "TARGET_SIMD" 5885{ 5886 machine_mode mode = <VALL_F16:MODE>mode; 5887 rtx mem = gen_rtx_MEM (mode, operands[0]); 5888 5889 if (BYTES_BIG_ENDIAN) 5890 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); 5891 else 5892 emit_move_insn (mem, operands[1]); 5893 DONE; 5894}) 5895 5896;; Expander for builtins to insert vector registers into large 5897;; opaque integer modes. 5898 5899;; Q-register list. We don't need a D-reg inserter as we zero 5900;; extend them in arm_neon.h and insert the resulting Q-regs. 5901 5902(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" 5903 [(match_operand:VSTRUCT 0 "register_operand" "+w") 5904 (match_operand:VSTRUCT 1 "register_operand" "0") 5905 (match_operand:VQ 2 "register_operand" "w") 5906 (match_operand:SI 3 "immediate_operand" "i")] 5907 "TARGET_SIMD" 5908{ 5909 int part = INTVAL (operands[3]); 5910 int offset = part * 16; 5911 5912 emit_move_insn (operands[0], operands[1]); 5913 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), 5914 operands[2]); 5915 DONE; 5916}) 5917 5918;; Standard pattern name vec_init<mode><Vel>. 5919 5920(define_expand "vec_init<mode><Vel>" 5921 [(match_operand:VALL_F16 0 "register_operand" "") 5922 (match_operand 1 "" "")] 5923 "TARGET_SIMD" 5924{ 5925 aarch64_expand_vector_init (operands[0], operands[1]); 5926 DONE; 5927}) 5928 5929(define_insn "*aarch64_simd_ld1r<mode>" 5930 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5931 (vec_duplicate:VALL_F16 5932 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] 5933 "TARGET_SIMD" 5934 "ld1r\\t{%0.<Vtype>}, %1" 5935 [(set_attr "type" "neon_load1_all_lanes")] 5936) 5937 5938(define_insn "aarch64_simd_ld1<mode>_x2" 5939 [(set (match_operand:OI 0 "register_operand" "=w") 5940 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 5941 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5942 UNSPEC_LD1))] 5943 "TARGET_SIMD" 5944 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5945 [(set_attr "type" "neon_load1_2reg<q>")] 5946) 5947 5948(define_insn "aarch64_simd_ld1<mode>_x2" 5949 [(set (match_operand:OI 0 "register_operand" "=w") 5950 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 5951 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5952 UNSPEC_LD1))] 5953 "TARGET_SIMD" 5954 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5955 [(set_attr "type" "neon_load1_2reg<q>")] 5956) 5957 5958 5959(define_insn "@aarch64_frecpe<mode>" 5960 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 5961 (unspec:VHSDF_HSDF 5962 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 5963 UNSPEC_FRECPE))] 5964 "TARGET_SIMD" 5965 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>" 5966 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 5967) 5968 5969(define_insn "aarch64_frecpx<mode>" 5970 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 5971 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 5972 UNSPEC_FRECPX))] 5973 "TARGET_SIMD" 5974 "frecpx\t%<s>0, %<s>1" 5975 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")] 5976) 5977 5978(define_insn "@aarch64_frecps<mode>" 5979 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 5980 (unspec:VHSDF_HSDF 5981 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 5982 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 5983 UNSPEC_FRECPS))] 5984 "TARGET_SIMD" 5985 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5986 [(set_attr "type" "neon_fp_recps_<stype><q>")] 5987) 5988 5989(define_insn "aarch64_urecpe<mode>" 5990 [(set (match_operand:VDQ_SI 0 "register_operand" "=w") 5991 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 5992 UNSPEC_URECPE))] 5993 "TARGET_SIMD" 5994 "urecpe\\t%0.<Vtype>, %1.<Vtype>" 5995 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) 5996 5997;; Standard pattern name vec_extract<mode><Vel>. 5998 5999(define_expand "vec_extract<mode><Vel>" 6000 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") 6001 (match_operand:VALL_F16 1 "register_operand" "") 6002 (match_operand:SI 2 "immediate_operand" "")] 6003 "TARGET_SIMD" 6004{ 6005 emit_insn 6006 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); 6007 DONE; 6008}) 6009 6010;; aes 6011 6012(define_insn "aarch64_crypto_aes<aes_op>v16qi" 6013 [(set (match_operand:V16QI 0 "register_operand" "=w") 6014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") 6015 (match_operand:V16QI 2 "register_operand" "w")] 6016 CRYPTO_AES))] 6017 "TARGET_SIMD && TARGET_AES" 6018 "aes<aes_op>\\t%0.16b, %2.16b" 6019 [(set_attr "type" "crypto_aese")] 6020) 6021 6022(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine" 6023 [(set (match_operand:V16QI 0 "register_operand" "=w") 6024 (unspec:V16QI [(xor:V16QI 6025 (match_operand:V16QI 1 "register_operand" "%0") 6026 (match_operand:V16QI 2 "register_operand" "w")) 6027 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")] 6028 CRYPTO_AES))] 6029 "TARGET_SIMD && TARGET_AES" 6030 "aes<aes_op>\\t%0.16b, %2.16b" 6031 [(set_attr "type" "crypto_aese")] 6032) 6033 6034(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine" 6035 [(set (match_operand:V16QI 0 "register_operand" "=w") 6036 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "") 6037 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0") 6038 (match_operand:V16QI 2 "register_operand" "w"))] 6039 CRYPTO_AES))] 6040 "TARGET_SIMD && TARGET_AES" 6041 "aes<aes_op>\\t%0.16b, %2.16b" 6042 [(set_attr "type" "crypto_aese")] 6043) 6044 6045;; When AES/AESMC fusion is enabled we want the register allocation to 6046;; look like: 6047;; AESE Vn, _ 6048;; AESMC Vn, Vn 6049;; So prefer to tie operand 1 to operand 0 when fusing. 6050 6051(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 6052 [(set (match_operand:V16QI 0 "register_operand" "=w,w") 6053 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] 6054 CRYPTO_AESMC))] 6055 "TARGET_SIMD && TARGET_AES" 6056 "aes<aesmc_op>\\t%0.16b, %1.16b" 6057 [(set_attr "type" "crypto_aesmc") 6058 (set_attr_alternative "enabled" 6059 [(if_then_else (match_test 6060 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") 6061 (const_string "yes" ) 6062 (const_string "no")) 6063 (const_string "yes")])] 6064) 6065 6066;; When AESE/AESMC fusion is enabled we really want to keep the two together 6067;; and enforce the register dependency without scheduling or register 6068;; allocation messing up the order or introducing moves inbetween. 6069;; Mash the two together during combine. 6070 6071(define_insn "*aarch64_crypto_aese_fused" 6072 [(set (match_operand:V16QI 0 "register_operand" "=&w") 6073 (unspec:V16QI 6074 [(unspec:V16QI 6075 [(match_operand:V16QI 1 "register_operand" "0") 6076 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) 6077 ] UNSPEC_AESMC))] 6078 "TARGET_SIMD && TARGET_AES 6079 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 6080 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" 6081 [(set_attr "type" "crypto_aese") 6082 (set_attr "length" "8")] 6083) 6084 6085;; When AESD/AESIMC fusion is enabled we really want to keep the two together 6086;; and enforce the register dependency without scheduling or register 6087;; allocation messing up the order or introducing moves inbetween. 6088;; Mash the two together during combine. 6089 6090(define_insn "*aarch64_crypto_aesd_fused" 6091 [(set (match_operand:V16QI 0 "register_operand" "=&w") 6092 (unspec:V16QI 6093 [(unspec:V16QI 6094 [(match_operand:V16QI 1 "register_operand" "0") 6095 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) 6096 ] UNSPEC_AESIMC))] 6097 "TARGET_SIMD && TARGET_AES 6098 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 6099 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" 6100 [(set_attr "type" "crypto_aese") 6101 (set_attr "length" "8")] 6102) 6103 6104;; sha1 6105 6106(define_insn "aarch64_crypto_sha1hsi" 6107 [(set (match_operand:SI 0 "register_operand" "=w") 6108 (unspec:SI [(match_operand:SI 1 6109 "register_operand" "w")] 6110 UNSPEC_SHA1H))] 6111 "TARGET_SIMD && TARGET_SHA2" 6112 "sha1h\\t%s0, %s1" 6113 [(set_attr "type" "crypto_sha1_fast")] 6114) 6115 6116(define_insn "aarch64_crypto_sha1hv4si" 6117 [(set (match_operand:SI 0 "register_operand" "=w") 6118 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6119 (parallel [(const_int 0)]))] 6120 UNSPEC_SHA1H))] 6121 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" 6122 "sha1h\\t%s0, %s1" 6123 [(set_attr "type" "crypto_sha1_fast")] 6124) 6125 6126(define_insn "aarch64_be_crypto_sha1hv4si" 6127 [(set (match_operand:SI 0 "register_operand" "=w") 6128 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6129 (parallel [(const_int 3)]))] 6130 UNSPEC_SHA1H))] 6131 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" 6132 "sha1h\\t%s0, %s1" 6133 [(set_attr "type" "crypto_sha1_fast")] 6134) 6135 6136(define_insn "aarch64_crypto_sha1su1v4si" 6137 [(set (match_operand:V4SI 0 "register_operand" "=w") 6138 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6139 (match_operand:V4SI 2 "register_operand" "w")] 6140 UNSPEC_SHA1SU1))] 6141 "TARGET_SIMD && TARGET_SHA2" 6142 "sha1su1\\t%0.4s, %2.4s" 6143 [(set_attr "type" "crypto_sha1_fast")] 6144) 6145 6146(define_insn "aarch64_crypto_sha1<sha1_op>v4si" 6147 [(set (match_operand:V4SI 0 "register_operand" "=w") 6148 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6149 (match_operand:SI 2 "register_operand" "w") 6150 (match_operand:V4SI 3 "register_operand" "w")] 6151 CRYPTO_SHA1))] 6152 "TARGET_SIMD && TARGET_SHA2" 6153 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 6154 [(set_attr "type" "crypto_sha1_slow")] 6155) 6156 6157(define_insn "aarch64_crypto_sha1su0v4si" 6158 [(set (match_operand:V4SI 0 "register_operand" "=w") 6159 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6160 (match_operand:V4SI 2 "register_operand" "w") 6161 (match_operand:V4SI 3 "register_operand" "w")] 6162 UNSPEC_SHA1SU0))] 6163 "TARGET_SIMD && TARGET_SHA2" 6164 "sha1su0\\t%0.4s, %2.4s, %3.4s" 6165 [(set_attr "type" "crypto_sha1_xor")] 6166) 6167 6168;; sha256 6169 6170(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" 6171 [(set (match_operand:V4SI 0 "register_operand" "=w") 6172 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6173 (match_operand:V4SI 2 "register_operand" "w") 6174 (match_operand:V4SI 3 "register_operand" "w")] 6175 CRYPTO_SHA256))] 6176 "TARGET_SIMD && TARGET_SHA2" 6177 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 6178 [(set_attr "type" "crypto_sha256_slow")] 6179) 6180 6181(define_insn "aarch64_crypto_sha256su0v4si" 6182 [(set (match_operand:V4SI 0 "register_operand" "=w") 6183 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6184 (match_operand:V4SI 2 "register_operand" "w")] 6185 UNSPEC_SHA256SU0))] 6186 "TARGET_SIMD && TARGET_SHA2" 6187 "sha256su0\\t%0.4s, %2.4s" 6188 [(set_attr "type" "crypto_sha256_fast")] 6189) 6190 6191(define_insn "aarch64_crypto_sha256su1v4si" 6192 [(set (match_operand:V4SI 0 "register_operand" "=w") 6193 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6194 (match_operand:V4SI 2 "register_operand" "w") 6195 (match_operand:V4SI 3 "register_operand" "w")] 6196 UNSPEC_SHA256SU1))] 6197 "TARGET_SIMD && TARGET_SHA2" 6198 "sha256su1\\t%0.4s, %2.4s, %3.4s" 6199 [(set_attr "type" "crypto_sha256_slow")] 6200) 6201 6202;; sha512 6203 6204(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di" 6205 [(set (match_operand:V2DI 0 "register_operand" "=w") 6206 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6207 (match_operand:V2DI 2 "register_operand" "w") 6208 (match_operand:V2DI 3 "register_operand" "w")] 6209 CRYPTO_SHA512))] 6210 "TARGET_SIMD && TARGET_SHA3" 6211 "sha512h<sha512_op>\\t%q0, %q2, %3.2d" 6212 [(set_attr "type" "crypto_sha512")] 6213) 6214 6215(define_insn "aarch64_crypto_sha512su0qv2di" 6216 [(set (match_operand:V2DI 0 "register_operand" "=w") 6217 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6218 (match_operand:V2DI 2 "register_operand" "w")] 6219 UNSPEC_SHA512SU0))] 6220 "TARGET_SIMD && TARGET_SHA3" 6221 "sha512su0\\t%0.2d, %2.2d" 6222 [(set_attr "type" "crypto_sha512")] 6223) 6224 6225(define_insn "aarch64_crypto_sha512su1qv2di" 6226 [(set (match_operand:V2DI 0 "register_operand" "=w") 6227 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6228 (match_operand:V2DI 2 "register_operand" "w") 6229 (match_operand:V2DI 3 "register_operand" "w")] 6230 UNSPEC_SHA512SU1))] 6231 "TARGET_SIMD && TARGET_SHA3" 6232 "sha512su1\\t%0.2d, %2.2d, %3.2d" 6233 [(set_attr "type" "crypto_sha512")] 6234) 6235 6236;; sha3 6237 6238(define_insn "eor3q<mode>4" 6239 [(set (match_operand:VQ_I 0 "register_operand" "=w") 6240 (xor:VQ_I 6241 (xor:VQ_I 6242 (match_operand:VQ_I 2 "register_operand" "w") 6243 (match_operand:VQ_I 3 "register_operand" "w")) 6244 (match_operand:VQ_I 1 "register_operand" "w")))] 6245 "TARGET_SIMD && TARGET_SHA3" 6246 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" 6247 [(set_attr "type" "crypto_sha3")] 6248) 6249 6250(define_insn "aarch64_rax1qv2di" 6251 [(set (match_operand:V2DI 0 "register_operand" "=w") 6252 (xor:V2DI 6253 (rotate:V2DI 6254 (match_operand:V2DI 2 "register_operand" "w") 6255 (const_int 1)) 6256 (match_operand:V2DI 1 "register_operand" "w")))] 6257 "TARGET_SIMD && TARGET_SHA3" 6258 "rax1\\t%0.2d, %1.2d, %2.2d" 6259 [(set_attr "type" "crypto_sha3")] 6260) 6261 6262(define_insn "aarch64_xarqv2di" 6263 [(set (match_operand:V2DI 0 "register_operand" "=w") 6264 (rotatert:V2DI 6265 (xor:V2DI 6266 (match_operand:V2DI 1 "register_operand" "%w") 6267 (match_operand:V2DI 2 "register_operand" "w")) 6268 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] 6269 "TARGET_SIMD && TARGET_SHA3" 6270 "xar\\t%0.2d, %1.2d, %2.2d, %3" 6271 [(set_attr "type" "crypto_sha3")] 6272) 6273 6274(define_insn "bcaxq<mode>4" 6275 [(set (match_operand:VQ_I 0 "register_operand" "=w") 6276 (xor:VQ_I 6277 (and:VQ_I 6278 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) 6279 (match_operand:VQ_I 2 "register_operand" "w")) 6280 (match_operand:VQ_I 1 "register_operand" "w")))] 6281 "TARGET_SIMD && TARGET_SHA3" 6282 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" 6283 [(set_attr "type" "crypto_sha3")] 6284) 6285 6286;; SM3 6287 6288(define_insn "aarch64_sm3ss1qv4si" 6289 [(set (match_operand:V4SI 0 "register_operand" "=w") 6290 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6291 (match_operand:V4SI 2 "register_operand" "w") 6292 (match_operand:V4SI 3 "register_operand" "w")] 6293 UNSPEC_SM3SS1))] 6294 "TARGET_SIMD && TARGET_SM4" 6295 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" 6296 [(set_attr "type" "crypto_sm3")] 6297) 6298 6299 6300(define_insn "aarch64_sm3tt<sm3tt_op>qv4si" 6301 [(set (match_operand:V4SI 0 "register_operand" "=w") 6302 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6303 (match_operand:V4SI 2 "register_operand" "w") 6304 (match_operand:V4SI 3 "register_operand" "w") 6305 (match_operand:SI 4 "aarch64_imm2" "Ui2")] 6306 CRYPTO_SM3TT))] 6307 "TARGET_SIMD && TARGET_SM4" 6308 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]" 6309 [(set_attr "type" "crypto_sm3")] 6310) 6311 6312(define_insn "aarch64_sm3partw<sm3part_op>qv4si" 6313 [(set (match_operand:V4SI 0 "register_operand" "=w") 6314 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6315 (match_operand:V4SI 2 "register_operand" "w") 6316 (match_operand:V4SI 3 "register_operand" "w")] 6317 CRYPTO_SM3PART))] 6318 "TARGET_SIMD && TARGET_SM4" 6319 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" 6320 [(set_attr "type" "crypto_sm3")] 6321) 6322 6323;; SM4 6324 6325(define_insn "aarch64_sm4eqv4si" 6326 [(set (match_operand:V4SI 0 "register_operand" "=w") 6327 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6328 (match_operand:V4SI 2 "register_operand" "w")] 6329 UNSPEC_SM4E))] 6330 "TARGET_SIMD && TARGET_SM4" 6331 "sm4e\\t%0.4s, %2.4s" 6332 [(set_attr "type" "crypto_sm4")] 6333) 6334 6335(define_insn "aarch64_sm4ekeyqv4si" 6336 [(set (match_operand:V4SI 0 "register_operand" "=w") 6337 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6338 (match_operand:V4SI 2 "register_operand" "w")] 6339 UNSPEC_SM4EKEY))] 6340 "TARGET_SIMD && TARGET_SM4" 6341 "sm4ekey\\t%0.4s, %1.4s, %2.4s" 6342 [(set_attr "type" "crypto_sm4")] 6343) 6344 6345;; fp16fml 6346 6347(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>" 6348 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6349 (unspec:VDQSF 6350 [(match_operand:VDQSF 1 "register_operand" "0") 6351 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6352 (match_operand:<VFMLA_W> 3 "register_operand" "w")] 6353 VFMLA16_LOW))] 6354 "TARGET_F16FML" 6355{ 6356 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6357 <nunits> * 2, false); 6358 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6359 <nunits> * 2, false); 6360 6361 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0], 6362 operands[1], 6363 operands[2], 6364 operands[3], 6365 p1, p2)); 6366 DONE; 6367 6368}) 6369 6370(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>" 6371 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6372 (unspec:VDQSF 6373 [(match_operand:VDQSF 1 "register_operand" "0") 6374 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6375 (match_operand:<VFMLA_W> 3 "register_operand" "w")] 6376 VFMLA16_HIGH))] 6377 "TARGET_F16FML" 6378{ 6379 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6380 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6381 6382 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0], 6383 operands[1], 6384 operands[2], 6385 operands[3], 6386 p1, p2)); 6387 DONE; 6388}) 6389 6390(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>" 6391 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6392 (fma:VDQSF 6393 (float_extend:VDQSF 6394 (vec_select:<VFMLA_SEL_W> 6395 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6396 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))) 6397 (float_extend:VDQSF 6398 (vec_select:<VFMLA_SEL_W> 6399 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6400 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6401 (match_operand:VDQSF 1 "register_operand" "0")))] 6402 "TARGET_F16FML" 6403 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6404 [(set_attr "type" "neon_fp_mul_s")] 6405) 6406 6407(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>" 6408 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6409 (fma:VDQSF 6410 (float_extend:VDQSF 6411 (neg:<VFMLA_SEL_W> 6412 (vec_select:<VFMLA_SEL_W> 6413 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6414 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))) 6415 (float_extend:VDQSF 6416 (vec_select:<VFMLA_SEL_W> 6417 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6418 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6419 (match_operand:VDQSF 1 "register_operand" "0")))] 6420 "TARGET_F16FML" 6421 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6422 [(set_attr "type" "neon_fp_mul_s")] 6423) 6424 6425(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>" 6426 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6427 (fma:VDQSF 6428 (float_extend:VDQSF 6429 (vec_select:<VFMLA_SEL_W> 6430 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6431 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))) 6432 (float_extend:VDQSF 6433 (vec_select:<VFMLA_SEL_W> 6434 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6435 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6436 (match_operand:VDQSF 1 "register_operand" "0")))] 6437 "TARGET_F16FML" 6438 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6439 [(set_attr "type" "neon_fp_mul_s")] 6440) 6441 6442(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>" 6443 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6444 (fma:VDQSF 6445 (float_extend:VDQSF 6446 (neg:<VFMLA_SEL_W> 6447 (vec_select:<VFMLA_SEL_W> 6448 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6449 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))) 6450 (float_extend:VDQSF 6451 (vec_select:<VFMLA_SEL_W> 6452 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6453 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6454 (match_operand:VDQSF 1 "register_operand" "0")))] 6455 "TARGET_F16FML" 6456 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6457 [(set_attr "type" "neon_fp_mul_s")] 6458) 6459 6460(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf" 6461 [(set (match_operand:V2SF 0 "register_operand" "") 6462 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6463 (match_operand:V4HF 2 "register_operand" "") 6464 (match_operand:V4HF 3 "register_operand" "") 6465 (match_operand:SI 4 "aarch64_imm2" "")] 6466 VFMLA16_LOW))] 6467 "TARGET_F16FML" 6468{ 6469 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6470 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6471 6472 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0], 6473 operands[1], 6474 operands[2], 6475 operands[3], 6476 p1, lane)); 6477 DONE; 6478} 6479) 6480 6481(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf" 6482 [(set (match_operand:V2SF 0 "register_operand" "") 6483 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6484 (match_operand:V4HF 2 "register_operand" "") 6485 (match_operand:V4HF 3 "register_operand" "") 6486 (match_operand:SI 4 "aarch64_imm2" "")] 6487 VFMLA16_HIGH))] 6488 "TARGET_F16FML" 6489{ 6490 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6491 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6492 6493 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0], 6494 operands[1], 6495 operands[2], 6496 operands[3], 6497 p1, lane)); 6498 DONE; 6499}) 6500 6501(define_insn "aarch64_simd_fmlal_lane_lowv2sf" 6502 [(set (match_operand:V2SF 0 "register_operand" "=w") 6503 (fma:V2SF 6504 (float_extend:V2SF 6505 (vec_select:V2HF 6506 (match_operand:V4HF 2 "register_operand" "w") 6507 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6508 (float_extend:V2SF 6509 (vec_duplicate:V2HF 6510 (vec_select:HF 6511 (match_operand:V4HF 3 "register_operand" "x") 6512 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6513 (match_operand:V2SF 1 "register_operand" "0")))] 6514 "TARGET_F16FML" 6515 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6516 [(set_attr "type" "neon_fp_mul_s")] 6517) 6518 6519(define_insn "aarch64_simd_fmlsl_lane_lowv2sf" 6520 [(set (match_operand:V2SF 0 "register_operand" "=w") 6521 (fma:V2SF 6522 (float_extend:V2SF 6523 (neg:V2HF 6524 (vec_select:V2HF 6525 (match_operand:V4HF 2 "register_operand" "w") 6526 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6527 (float_extend:V2SF 6528 (vec_duplicate:V2HF 6529 (vec_select:HF 6530 (match_operand:V4HF 3 "register_operand" "x") 6531 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6532 (match_operand:V2SF 1 "register_operand" "0")))] 6533 "TARGET_F16FML" 6534 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6535 [(set_attr "type" "neon_fp_mul_s")] 6536) 6537 6538(define_insn "aarch64_simd_fmlal_lane_highv2sf" 6539 [(set (match_operand:V2SF 0 "register_operand" "=w") 6540 (fma:V2SF 6541 (float_extend:V2SF 6542 (vec_select:V2HF 6543 (match_operand:V4HF 2 "register_operand" "w") 6544 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6545 (float_extend:V2SF 6546 (vec_duplicate:V2HF 6547 (vec_select:HF 6548 (match_operand:V4HF 3 "register_operand" "x") 6549 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6550 (match_operand:V2SF 1 "register_operand" "0")))] 6551 "TARGET_F16FML" 6552 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6553 [(set_attr "type" "neon_fp_mul_s")] 6554) 6555 6556(define_insn "aarch64_simd_fmlsl_lane_highv2sf" 6557 [(set (match_operand:V2SF 0 "register_operand" "=w") 6558 (fma:V2SF 6559 (float_extend:V2SF 6560 (neg:V2HF 6561 (vec_select:V2HF 6562 (match_operand:V4HF 2 "register_operand" "w") 6563 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6564 (float_extend:V2SF 6565 (vec_duplicate:V2HF 6566 (vec_select:HF 6567 (match_operand:V4HF 3 "register_operand" "x") 6568 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6569 (match_operand:V2SF 1 "register_operand" "0")))] 6570 "TARGET_F16FML" 6571 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6572 [(set_attr "type" "neon_fp_mul_s")] 6573) 6574 6575(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf" 6576 [(set (match_operand:V4SF 0 "register_operand" "") 6577 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6578 (match_operand:V8HF 2 "register_operand" "") 6579 (match_operand:V8HF 3 "register_operand" "") 6580 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6581 VFMLA16_LOW))] 6582 "TARGET_F16FML" 6583{ 6584 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6585 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6586 6587 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0], 6588 operands[1], 6589 operands[2], 6590 operands[3], 6591 p1, lane)); 6592 DONE; 6593}) 6594 6595(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf" 6596 [(set (match_operand:V4SF 0 "register_operand" "") 6597 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6598 (match_operand:V8HF 2 "register_operand" "") 6599 (match_operand:V8HF 3 "register_operand" "") 6600 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6601 VFMLA16_HIGH))] 6602 "TARGET_F16FML" 6603{ 6604 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6605 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6606 6607 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0], 6608 operands[1], 6609 operands[2], 6610 operands[3], 6611 p1, lane)); 6612 DONE; 6613}) 6614 6615(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf" 6616 [(set (match_operand:V4SF 0 "register_operand" "=w") 6617 (fma:V4SF 6618 (float_extend:V4SF 6619 (vec_select:V4HF 6620 (match_operand:V8HF 2 "register_operand" "w") 6621 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 6622 (float_extend:V4SF 6623 (vec_duplicate:V4HF 6624 (vec_select:HF 6625 (match_operand:V8HF 3 "register_operand" "x") 6626 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6627 (match_operand:V4SF 1 "register_operand" "0")))] 6628 "TARGET_F16FML" 6629 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 6630 [(set_attr "type" "neon_fp_mul_s")] 6631) 6632 6633(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf" 6634 [(set (match_operand:V4SF 0 "register_operand" "=w") 6635 (fma:V4SF 6636 (float_extend:V4SF 6637 (neg:V4HF 6638 (vec_select:V4HF 6639 (match_operand:V8HF 2 "register_operand" "w") 6640 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 6641 (float_extend:V4SF 6642 (vec_duplicate:V4HF 6643 (vec_select:HF 6644 (match_operand:V8HF 3 "register_operand" "x") 6645 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6646 (match_operand:V4SF 1 "register_operand" "0")))] 6647 "TARGET_F16FML" 6648 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 6649 [(set_attr "type" "neon_fp_mul_s")] 6650) 6651 6652(define_insn "aarch64_simd_fmlalq_laneq_highv4sf" 6653 [(set (match_operand:V4SF 0 "register_operand" "=w") 6654 (fma:V4SF 6655 (float_extend:V4SF 6656 (vec_select:V4HF 6657 (match_operand:V8HF 2 "register_operand" "w") 6658 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 6659 (float_extend:V4SF 6660 (vec_duplicate:V4HF 6661 (vec_select:HF 6662 (match_operand:V8HF 3 "register_operand" "x") 6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6664 (match_operand:V4SF 1 "register_operand" "0")))] 6665 "TARGET_F16FML" 6666 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 6667 [(set_attr "type" "neon_fp_mul_s")] 6668) 6669 6670(define_insn "aarch64_simd_fmlslq_laneq_highv4sf" 6671 [(set (match_operand:V4SF 0 "register_operand" "=w") 6672 (fma:V4SF 6673 (float_extend:V4SF 6674 (neg:V4HF 6675 (vec_select:V4HF 6676 (match_operand:V8HF 2 "register_operand" "w") 6677 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 6678 (float_extend:V4SF 6679 (vec_duplicate:V4HF 6680 (vec_select:HF 6681 (match_operand:V8HF 3 "register_operand" "x") 6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6683 (match_operand:V4SF 1 "register_operand" "0")))] 6684 "TARGET_F16FML" 6685 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 6686 [(set_attr "type" "neon_fp_mul_s")] 6687) 6688 6689(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf" 6690 [(set (match_operand:V2SF 0 "register_operand" "") 6691 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6692 (match_operand:V4HF 2 "register_operand" "") 6693 (match_operand:V8HF 3 "register_operand" "") 6694 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6695 VFMLA16_LOW))] 6696 "TARGET_F16FML" 6697{ 6698 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6699 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6700 6701 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0], 6702 operands[1], 6703 operands[2], 6704 operands[3], 6705 p1, lane)); 6706 DONE; 6707 6708}) 6709 6710(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf" 6711 [(set (match_operand:V2SF 0 "register_operand" "") 6712 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6713 (match_operand:V4HF 2 "register_operand" "") 6714 (match_operand:V8HF 3 "register_operand" "") 6715 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6716 VFMLA16_HIGH))] 6717 "TARGET_F16FML" 6718{ 6719 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6720 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6721 6722 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0], 6723 operands[1], 6724 operands[2], 6725 operands[3], 6726 p1, lane)); 6727 DONE; 6728 6729}) 6730 6731(define_insn "aarch64_simd_fmlal_laneq_lowv2sf" 6732 [(set (match_operand:V2SF 0 "register_operand" "=w") 6733 (fma:V2SF 6734 (float_extend:V2SF 6735 (vec_select:V2HF 6736 (match_operand:V4HF 2 "register_operand" "w") 6737 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6738 (float_extend:V2SF 6739 (vec_duplicate:V2HF 6740 (vec_select:HF 6741 (match_operand:V8HF 3 "register_operand" "x") 6742 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6743 (match_operand:V2SF 1 "register_operand" "0")))] 6744 "TARGET_F16FML" 6745 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6746 [(set_attr "type" "neon_fp_mul_s")] 6747) 6748 6749(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf" 6750 [(set (match_operand:V2SF 0 "register_operand" "=w") 6751 (fma:V2SF 6752 (float_extend:V2SF 6753 (neg:V2HF 6754 (vec_select:V2HF 6755 (match_operand:V4HF 2 "register_operand" "w") 6756 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6757 (float_extend:V2SF 6758 (vec_duplicate:V2HF 6759 (vec_select:HF 6760 (match_operand:V8HF 3 "register_operand" "x") 6761 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6762 (match_operand:V2SF 1 "register_operand" "0")))] 6763 "TARGET_F16FML" 6764 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6765 [(set_attr "type" "neon_fp_mul_s")] 6766) 6767 6768(define_insn "aarch64_simd_fmlal_laneq_highv2sf" 6769 [(set (match_operand:V2SF 0 "register_operand" "=w") 6770 (fma:V2SF 6771 (float_extend:V2SF 6772 (vec_select:V2HF 6773 (match_operand:V4HF 2 "register_operand" "w") 6774 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6775 (float_extend:V2SF 6776 (vec_duplicate:V2HF 6777 (vec_select:HF 6778 (match_operand:V8HF 3 "register_operand" "x") 6779 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6780 (match_operand:V2SF 1 "register_operand" "0")))] 6781 "TARGET_F16FML" 6782 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6783 [(set_attr "type" "neon_fp_mul_s")] 6784) 6785 6786(define_insn "aarch64_simd_fmlsl_laneq_highv2sf" 6787 [(set (match_operand:V2SF 0 "register_operand" "=w") 6788 (fma:V2SF 6789 (float_extend:V2SF 6790 (neg:V2HF 6791 (vec_select:V2HF 6792 (match_operand:V4HF 2 "register_operand" "w") 6793 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6794 (float_extend:V2SF 6795 (vec_duplicate:V2HF 6796 (vec_select:HF 6797 (match_operand:V8HF 3 "register_operand" "x") 6798 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6799 (match_operand:V2SF 1 "register_operand" "0")))] 6800 "TARGET_F16FML" 6801 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6802 [(set_attr "type" "neon_fp_mul_s")] 6803) 6804 6805(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf" 6806 [(set (match_operand:V4SF 0 "register_operand" "") 6807 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6808 (match_operand:V8HF 2 "register_operand" "") 6809 (match_operand:V4HF 3 "register_operand" "") 6810 (match_operand:SI 4 "aarch64_imm2" "")] 6811 VFMLA16_LOW))] 6812 "TARGET_F16FML" 6813{ 6814 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6815 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6816 6817 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0], 6818 operands[1], 6819 operands[2], 6820 operands[3], 6821 p1, lane)); 6822 DONE; 6823}) 6824 6825(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf" 6826 [(set (match_operand:V4SF 0 "register_operand" "") 6827 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6828 (match_operand:V8HF 2 "register_operand" "") 6829 (match_operand:V4HF 3 "register_operand" "") 6830 (match_operand:SI 4 "aarch64_imm2" "")] 6831 VFMLA16_HIGH))] 6832 "TARGET_F16FML" 6833{ 6834 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6835 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6836 6837 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0], 6838 operands[1], 6839 operands[2], 6840 operands[3], 6841 p1, lane)); 6842 DONE; 6843}) 6844 6845(define_insn "aarch64_simd_fmlalq_lane_lowv4sf" 6846 [(set (match_operand:V4SF 0 "register_operand" "=w") 6847 (fma:V4SF 6848 (float_extend:V4SF 6849 (vec_select:V4HF 6850 (match_operand:V8HF 2 "register_operand" "w") 6851 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 6852 (float_extend:V4SF 6853 (vec_duplicate:V4HF 6854 (vec_select:HF 6855 (match_operand:V4HF 3 "register_operand" "x") 6856 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6857 (match_operand:V4SF 1 "register_operand" "0")))] 6858 "TARGET_F16FML" 6859 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 6860 [(set_attr "type" "neon_fp_mul_s")] 6861) 6862 6863(define_insn "aarch64_simd_fmlslq_lane_lowv4sf" 6864 [(set (match_operand:V4SF 0 "register_operand" "=w") 6865 (fma:V4SF 6866 (float_extend:V4SF 6867 (neg:V4HF 6868 (vec_select:V4HF 6869 (match_operand:V8HF 2 "register_operand" "w") 6870 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 6871 (float_extend:V4SF 6872 (vec_duplicate:V4HF 6873 (vec_select:HF 6874 (match_operand:V4HF 3 "register_operand" "x") 6875 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6876 (match_operand:V4SF 1 "register_operand" "0")))] 6877 "TARGET_F16FML" 6878 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 6879 [(set_attr "type" "neon_fp_mul_s")] 6880) 6881 6882(define_insn "aarch64_simd_fmlalq_lane_highv4sf" 6883 [(set (match_operand:V4SF 0 "register_operand" "=w") 6884 (fma:V4SF 6885 (float_extend:V4SF 6886 (vec_select:V4HF 6887 (match_operand:V8HF 2 "register_operand" "w") 6888 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 6889 (float_extend:V4SF 6890 (vec_duplicate:V4HF 6891 (vec_select:HF 6892 (match_operand:V4HF 3 "register_operand" "x") 6893 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6894 (match_operand:V4SF 1 "register_operand" "0")))] 6895 "TARGET_F16FML" 6896 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 6897 [(set_attr "type" "neon_fp_mul_s")] 6898) 6899 6900(define_insn "aarch64_simd_fmlslq_lane_highv4sf" 6901 [(set (match_operand:V4SF 0 "register_operand" "=w") 6902 (fma:V4SF 6903 (float_extend:V4SF 6904 (neg:V4HF 6905 (vec_select:V4HF 6906 (match_operand:V8HF 2 "register_operand" "w") 6907 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 6908 (float_extend:V4SF 6909 (vec_duplicate:V4HF 6910 (vec_select:HF 6911 (match_operand:V4HF 3 "register_operand" "x") 6912 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6913 (match_operand:V4SF 1 "register_operand" "0")))] 6914 "TARGET_F16FML" 6915 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 6916 [(set_attr "type" "neon_fp_mul_s")] 6917) 6918 6919;; pmull 6920 6921(define_insn "aarch64_crypto_pmulldi" 6922 [(set (match_operand:TI 0 "register_operand" "=w") 6923 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 6924 (match_operand:DI 2 "register_operand" "w")] 6925 UNSPEC_PMULL))] 6926 "TARGET_SIMD && TARGET_AES" 6927 "pmull\\t%0.1q, %1.1d, %2.1d" 6928 [(set_attr "type" "crypto_pmull")] 6929) 6930 6931(define_insn "aarch64_crypto_pmullv2di" 6932 [(set (match_operand:TI 0 "register_operand" "=w") 6933 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 6934 (match_operand:V2DI 2 "register_operand" "w")] 6935 UNSPEC_PMULL2))] 6936 "TARGET_SIMD && TARGET_AES" 6937 "pmull2\\t%0.1q, %1.2d, %2.2d" 6938 [(set_attr "type" "crypto_pmull")] 6939) 6940