1;; Machine description for AArch64 AdvSIMD architecture. 2;; Copyright (C) 2011-2017 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_expand "mov<mode>" 22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") 23 (match_operand:VALL_F16 1 "general_operand" ""))] 24 "TARGET_SIMD" 25 " 26 if (GET_CODE (operands[0]) == MEM) 27 operands[1] = force_reg (<MODE>mode, operands[1]); 28 " 29) 30 31(define_expand "movmisalign<mode>" 32 [(set (match_operand:VALL 0 "nonimmediate_operand" "") 33 (match_operand:VALL 1 "general_operand" ""))] 34 "TARGET_SIMD" 35{ 36 /* This pattern is not permitted to fail during expansion: if both arguments 37 are non-registers (e.g. memory := constant, which can be created by the 38 auto-vectorizer), force operand 1 into a register. */ 39 if (!register_operand (operands[0], <MODE>mode) 40 && !register_operand (operands[1], <MODE>mode)) 41 operands[1] = force_reg (<MODE>mode, operands[1]); 42}) 43 44(define_insn "aarch64_simd_dup<mode>" 45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") 46 (vec_duplicate:VDQ_I 47 (match_operand:<VEL> 1 "register_operand" "r, w")))] 48 "TARGET_SIMD" 49 "@ 50 dup\\t%0.<Vtype>, %<vw>1 51 dup\\t%0.<Vtype>, %1.<Vetype>[0]" 52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")] 53) 54 55(define_insn "aarch64_simd_dup<mode>" 56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 57 (vec_duplicate:VDQF_F16 58 (match_operand:<VEL> 1 "register_operand" "w")))] 59 "TARGET_SIMD" 60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]" 61 [(set_attr "type" "neon_dup<q>")] 62) 63 64(define_insn "aarch64_dup_lane<mode>" 65 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 66 (vec_duplicate:VALL_F16 67 (vec_select:<VEL> 68 (match_operand:VALL_F16 1 "register_operand" "w") 69 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 70 )))] 71 "TARGET_SIMD" 72 { 73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 75 } 76 [(set_attr "type" "neon_dup<q>")] 77) 78 79(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" 80 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 81 (vec_duplicate:VALL_F16_NO_V2Q 82 (vec_select:<VEL> 83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 84 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 85 )))] 86 "TARGET_SIMD" 87 { 88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 89 INTVAL (operands[2]))); 90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 91 } 92 [(set_attr "type" "neon_dup<q>")] 93) 94 95(define_insn "*aarch64_simd_mov<mode>" 96 [(set (match_operand:VD 0 "nonimmediate_operand" 97 "=w, m, w, ?r, ?w, ?r, w") 98 (match_operand:VD 1 "general_operand" 99 "m, w, w, w, r, r, Dn"))] 100 "TARGET_SIMD 101 && (register_operand (operands[0], <MODE>mode) 102 || register_operand (operands[1], <MODE>mode))" 103{ 104 switch (which_alternative) 105 { 106 case 0: return "ldr\\t%d0, %1"; 107 case 1: return "str\\t%d1, %0"; 108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; 109 case 3: return "umov\t%0, %1.d[0]"; 110 case 4: return "fmov\t%d0, %1"; 111 case 5: return "mov\t%0, %1"; 112 case 6: 113 return aarch64_output_simd_mov_immediate (operands[1], 114 <MODE>mode, 64); 115 default: gcc_unreachable (); 116 } 117} 118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ 119 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 120 mov_reg, neon_move<q>")] 121) 122 123(define_insn "*aarch64_simd_mov<mode>" 124 [(set (match_operand:VQ 0 "nonimmediate_operand" 125 "=w, m, w, ?r, ?w, ?r, w") 126 (match_operand:VQ 1 "general_operand" 127 "m, w, w, w, r, r, Dn"))] 128 "TARGET_SIMD 129 && (register_operand (operands[0], <MODE>mode) 130 || register_operand (operands[1], <MODE>mode))" 131{ 132 switch (which_alternative) 133 { 134 case 0: 135 return "ldr\\t%q0, %1"; 136 case 1: 137 return "str\\t%q1, %0"; 138 case 2: 139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; 140 case 3: 141 case 4: 142 case 5: 143 return "#"; 144 case 6: 145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); 146 default: 147 gcc_unreachable (); 148 } 149} 150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ 151 neon_logic<q>, multiple, multiple, multiple,\ 152 neon_move<q>") 153 (set_attr "length" "4,4,4,8,8,8,4")] 154) 155 156(define_insn "load_pair<mode>" 157 [(set (match_operand:VD 0 "register_operand" "=w") 158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) 159 (set (match_operand:VD 2 "register_operand" "=w") 160 (match_operand:VD 3 "memory_operand" "m"))] 161 "TARGET_SIMD 162 && rtx_equal_p (XEXP (operands[3], 0), 163 plus_constant (Pmode, 164 XEXP (operands[1], 0), 165 GET_MODE_SIZE (<MODE>mode)))" 166 "ldp\\t%d0, %d2, %1" 167 [(set_attr "type" "neon_ldp")] 168) 169 170(define_insn "store_pair<mode>" 171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") 172 (match_operand:VD 1 "register_operand" "w")) 173 (set (match_operand:VD 2 "memory_operand" "=m") 174 (match_operand:VD 3 "register_operand" "w"))] 175 "TARGET_SIMD 176 && rtx_equal_p (XEXP (operands[2], 0), 177 plus_constant (Pmode, 178 XEXP (operands[0], 0), 179 GET_MODE_SIZE (<MODE>mode)))" 180 "stp\\t%d1, %d3, %0" 181 [(set_attr "type" "neon_stp")] 182) 183 184(define_split 185 [(set (match_operand:VQ 0 "register_operand" "") 186 (match_operand:VQ 1 "register_operand" ""))] 187 "TARGET_SIMD && reload_completed 188 && GP_REGNUM_P (REGNO (operands[0])) 189 && GP_REGNUM_P (REGNO (operands[1]))" 190 [(const_int 0)] 191{ 192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2); 193 DONE; 194}) 195 196(define_split 197 [(set (match_operand:VQ 0 "register_operand" "") 198 (match_operand:VQ 1 "register_operand" ""))] 199 "TARGET_SIMD && reload_completed 200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) 201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" 202 [(const_int 0)] 203{ 204 aarch64_split_simd_move (operands[0], operands[1]); 205 DONE; 206}) 207 208(define_expand "aarch64_split_simd_mov<mode>" 209 [(set (match_operand:VQ 0) 210 (match_operand:VQ 1))] 211 "TARGET_SIMD" 212 { 213 rtx dst = operands[0]; 214 rtx src = operands[1]; 215 216 if (GP_REGNUM_P (REGNO (src))) 217 { 218 rtx src_low_part = gen_lowpart (<VHALF>mode, src); 219 rtx src_high_part = gen_highpart (<VHALF>mode, src); 220 221 emit_insn 222 (gen_move_lo_quad_<mode> (dst, src_low_part)); 223 emit_insn 224 (gen_move_hi_quad_<mode> (dst, src_high_part)); 225 } 226 227 else 228 { 229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 233 234 emit_insn 235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); 236 emit_insn 237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); 238 } 239 DONE; 240 } 241) 242 243(define_insn "aarch64_simd_mov_from_<mode>low" 244 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 245 (vec_select:<VHALF> 246 (match_operand:VQ 1 "register_operand" "w") 247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] 248 "TARGET_SIMD && reload_completed" 249 "umov\t%0, %1.d[0]" 250 [(set_attr "type" "neon_to_gp<q>") 251 (set_attr "length" "4") 252 ]) 253 254(define_insn "aarch64_simd_mov_from_<mode>high" 255 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 256 (vec_select:<VHALF> 257 (match_operand:VQ 1 "register_operand" "w") 258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] 259 "TARGET_SIMD && reload_completed" 260 "umov\t%0, %1.d[1]" 261 [(set_attr "type" "neon_to_gp<q>") 262 (set_attr "length" "4") 263 ]) 264 265(define_insn "orn<mode>3" 266 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 268 (match_operand:VDQ_I 2 "register_operand" "w")))] 269 "TARGET_SIMD" 270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 271 [(set_attr "type" "neon_logic<q>")] 272) 273 274(define_insn "bic<mode>3" 275 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 277 (match_operand:VDQ_I 2 "register_operand" "w")))] 278 "TARGET_SIMD" 279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 280 [(set_attr "type" "neon_logic<q>")] 281) 282 283(define_insn "add<mode>3" 284 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 286 (match_operand:VDQ_I 2 "register_operand" "w")))] 287 "TARGET_SIMD" 288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 289 [(set_attr "type" "neon_add<q>")] 290) 291 292(define_insn "sub<mode>3" 293 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 295 (match_operand:VDQ_I 2 "register_operand" "w")))] 296 "TARGET_SIMD" 297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 298 [(set_attr "type" "neon_sub<q>")] 299) 300 301(define_insn "mul<mode>3" 302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 305 "TARGET_SIMD" 306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 307 [(set_attr "type" "neon_mul_<Vetype><q>")] 308) 309 310(define_insn "bswap<mode>2" 311 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 313 "TARGET_SIMD" 314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" 315 [(set_attr "type" "neon_rev<q>")] 316) 317 318(define_insn "aarch64_rbit<mode>" 319 [(set (match_operand:VB 0 "register_operand" "=w") 320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")] 321 UNSPEC_RBIT))] 322 "TARGET_SIMD" 323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>" 324 [(set_attr "type" "neon_rbit")] 325) 326 327(define_expand "ctz<mode>2" 328 [(set (match_operand:VS 0 "register_operand") 329 (ctz:VS (match_operand:VS 1 "register_operand")))] 330 "TARGET_SIMD" 331 { 332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); 333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], 334 <MODE>mode, 0); 335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); 336 emit_insn (gen_clz<mode>2 (operands[0], operands[0])); 337 DONE; 338 } 339) 340 341(define_expand "copysign<mode>3" 342 [(match_operand:VHSDF 0 "register_operand") 343 (match_operand:VHSDF 1 "register_operand") 344 (match_operand:VHSDF 2 "register_operand")] 345 "TARGET_FLOAT && TARGET_SIMD" 346{ 347 rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode); 348 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 349 350 emit_move_insn (v_bitmask, 351 aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode, 352 HOST_WIDE_INT_M1U << bits)); 353 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, 354 operands[2], operands[1])); 355 DONE; 356} 357) 358 359(define_insn "*aarch64_mul3_elt<mode>" 360 [(set (match_operand:VMUL 0 "register_operand" "=w") 361 (mult:VMUL 362 (vec_duplicate:VMUL 363 (vec_select:<VEL> 364 (match_operand:VMUL 1 "register_operand" "<h_con>") 365 (parallel [(match_operand:SI 2 "immediate_operand")]))) 366 (match_operand:VMUL 3 "register_operand" "w")))] 367 "TARGET_SIMD" 368 { 369 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 370 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 371 } 372 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 373) 374 375(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" 376 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") 377 (mult:VMUL_CHANGE_NLANES 378 (vec_duplicate:VMUL_CHANGE_NLANES 379 (vec_select:<VEL> 380 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 381 (parallel [(match_operand:SI 2 "immediate_operand")]))) 382 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 383 "TARGET_SIMD" 384 { 385 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 386 INTVAL (operands[2]))); 387 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 388 } 389 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 390) 391 392(define_insn "*aarch64_mul3_elt_from_dup<mode>" 393 [(set (match_operand:VMUL 0 "register_operand" "=w") 394 (mult:VMUL 395 (vec_duplicate:VMUL 396 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 397 (match_operand:VMUL 2 "register_operand" "w")))] 398 "TARGET_SIMD" 399 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 400 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 401) 402 403(define_insn "aarch64_rsqrte<mode>" 404 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 405 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 406 UNSPEC_RSQRTE))] 407 "TARGET_SIMD" 408 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 409 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 410 411(define_insn "aarch64_rsqrts<mode>" 412 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 413 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 414 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 415 UNSPEC_RSQRTS))] 416 "TARGET_SIMD" 417 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 418 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) 419 420(define_expand "rsqrt<mode>2" 421 [(set (match_operand:VALLF 0 "register_operand" "=w") 422 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] 423 UNSPEC_RSQRT))] 424 "TARGET_SIMD" 425{ 426 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 427 DONE; 428}) 429 430(define_insn "*aarch64_mul3_elt_to_64v2df" 431 [(set (match_operand:DF 0 "register_operand" "=w") 432 (mult:DF 433 (vec_select:DF 434 (match_operand:V2DF 1 "register_operand" "w") 435 (parallel [(match_operand:SI 2 "immediate_operand")])) 436 (match_operand:DF 3 "register_operand" "w")))] 437 "TARGET_SIMD" 438 { 439 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 440 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 441 } 442 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 443) 444 445(define_insn "neg<mode>2" 446 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 447 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 448 "TARGET_SIMD" 449 "neg\t%0.<Vtype>, %1.<Vtype>" 450 [(set_attr "type" "neon_neg<q>")] 451) 452 453(define_insn "abs<mode>2" 454 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 455 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 456 "TARGET_SIMD" 457 "abs\t%0.<Vtype>, %1.<Vtype>" 458 [(set_attr "type" "neon_abs<q>")] 459) 460 461;; The intrinsic version of integer ABS must not be allowed to 462;; combine with any operation with an integerated ABS step, such 463;; as SABD. 464(define_insn "aarch64_abs<mode>" 465 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 466 (unspec:VSDQ_I_DI 467 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] 468 UNSPEC_ABS))] 469 "TARGET_SIMD" 470 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" 471 [(set_attr "type" "neon_abs<q>")] 472) 473 474(define_insn "abd<mode>_3" 475 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 476 (abs:VDQ_BHSI (minus:VDQ_BHSI 477 (match_operand:VDQ_BHSI 1 "register_operand" "w") 478 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] 479 "TARGET_SIMD" 480 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 481 [(set_attr "type" "neon_abd<q>")] 482) 483 484(define_insn "aba<mode>_3" 485 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 486 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 487 (match_operand:VDQ_BHSI 1 "register_operand" "w") 488 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) 489 (match_operand:VDQ_BHSI 3 "register_operand" "0")))] 490 "TARGET_SIMD" 491 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 492 [(set_attr "type" "neon_arith_acc<q>")] 493) 494 495(define_insn "fabd<mode>3" 496 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 497 (abs:VHSDF_HSDF 498 (minus:VHSDF_HSDF 499 (match_operand:VHSDF_HSDF 1 "register_operand" "w") 500 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] 501 "TARGET_SIMD" 502 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 503 [(set_attr "type" "neon_fp_abd_<stype><q>")] 504) 505 506(define_insn "and<mode>3" 507 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 508 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 509 (match_operand:VDQ_I 2 "register_operand" "w")))] 510 "TARGET_SIMD" 511 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 512 [(set_attr "type" "neon_logic<q>")] 513) 514 515(define_insn "ior<mode>3" 516 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 517 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 518 (match_operand:VDQ_I 2 "register_operand" "w")))] 519 "TARGET_SIMD" 520 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 521 [(set_attr "type" "neon_logic<q>")] 522) 523 524(define_insn "xor<mode>3" 525 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 526 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 527 (match_operand:VDQ_I 2 "register_operand" "w")))] 528 "TARGET_SIMD" 529 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 530 [(set_attr "type" "neon_logic<q>")] 531) 532 533(define_insn "one_cmpl<mode>2" 534 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 535 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 536 "TARGET_SIMD" 537 "not\t%0.<Vbtype>, %1.<Vbtype>" 538 [(set_attr "type" "neon_logic<q>")] 539) 540 541(define_insn "aarch64_simd_vec_set<mode>" 542 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w") 543 (vec_merge:VDQ_BHSI 544 (vec_duplicate:VDQ_BHSI 545 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv")) 546 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0") 547 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 548 "TARGET_SIMD" 549 { 550 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 551 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 552 switch (which_alternative) 553 { 554 case 0: 555 return "ins\\t%0.<Vetype>[%p2], %w1"; 556 case 1: 557 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 558 case 2: 559 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 560 default: 561 gcc_unreachable (); 562 } 563 } 564 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")] 565) 566 567(define_insn "*aarch64_simd_vec_copy_lane<mode>" 568 [(set (match_operand:VALL 0 "register_operand" "=w") 569 (vec_merge:VALL 570 (vec_duplicate:VALL 571 (vec_select:<VEL> 572 (match_operand:VALL 3 "register_operand" "w") 573 (parallel 574 [(match_operand:SI 4 "immediate_operand" "i")]))) 575 (match_operand:VALL 1 "register_operand" "0") 576 (match_operand:SI 2 "immediate_operand" "i")))] 577 "TARGET_SIMD" 578 { 579 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 580 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 581 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4]))); 582 583 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 584 } 585 [(set_attr "type" "neon_ins<q>")] 586) 587 588(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" 589 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 590 (vec_merge:VALL_F16_NO_V2Q 591 (vec_duplicate:VALL_F16_NO_V2Q 592 (vec_select:<VEL> 593 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") 594 (parallel 595 [(match_operand:SI 4 "immediate_operand" "i")]))) 596 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 597 (match_operand:SI 2 "immediate_operand" "i")))] 598 "TARGET_SIMD" 599 { 600 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 601 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 602 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 603 INTVAL (operands[4]))); 604 605 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 606 } 607 [(set_attr "type" "neon_ins<q>")] 608) 609 610(define_insn "aarch64_simd_lshr<mode>" 611 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 612 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 613 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 614 "TARGET_SIMD" 615 "ushr\t%0.<Vtype>, %1.<Vtype>, %2" 616 [(set_attr "type" "neon_shift_imm<q>")] 617) 618 619(define_insn "aarch64_simd_ashr<mode>" 620 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 621 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 622 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 623 "TARGET_SIMD" 624 "sshr\t%0.<Vtype>, %1.<Vtype>, %2" 625 [(set_attr "type" "neon_shift_imm<q>")] 626) 627 628(define_insn "aarch64_simd_imm_shl<mode>" 629 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 630 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 631 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] 632 "TARGET_SIMD" 633 "shl\t%0.<Vtype>, %1.<Vtype>, %2" 634 [(set_attr "type" "neon_shift_imm<q>")] 635) 636 637(define_insn "aarch64_simd_reg_sshl<mode>" 638 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 639 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 640 (match_operand:VDQ_I 2 "register_operand" "w")))] 641 "TARGET_SIMD" 642 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 643 [(set_attr "type" "neon_shift_reg<q>")] 644) 645 646(define_insn "aarch64_simd_reg_shl<mode>_unsigned" 647 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 648 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 649 (match_operand:VDQ_I 2 "register_operand" "w")] 650 UNSPEC_ASHIFT_UNSIGNED))] 651 "TARGET_SIMD" 652 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 653 [(set_attr "type" "neon_shift_reg<q>")] 654) 655 656(define_insn "aarch64_simd_reg_shl<mode>_signed" 657 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 658 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 659 (match_operand:VDQ_I 2 "register_operand" "w")] 660 UNSPEC_ASHIFT_SIGNED))] 661 "TARGET_SIMD" 662 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 663 [(set_attr "type" "neon_shift_reg<q>")] 664) 665 666(define_expand "ashl<mode>3" 667 [(match_operand:VDQ_I 0 "register_operand" "") 668 (match_operand:VDQ_I 1 "register_operand" "") 669 (match_operand:SI 2 "general_operand" "")] 670 "TARGET_SIMD" 671{ 672 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 673 int shift_amount; 674 675 if (CONST_INT_P (operands[2])) 676 { 677 shift_amount = INTVAL (operands[2]); 678 if (shift_amount >= 0 && shift_amount < bit_width) 679 { 680 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 681 shift_amount); 682 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], 683 operands[1], 684 tmp)); 685 DONE; 686 } 687 else 688 { 689 operands[2] = force_reg (SImode, operands[2]); 690 } 691 } 692 else if (MEM_P (operands[2])) 693 { 694 operands[2] = force_reg (SImode, operands[2]); 695 } 696 697 if (REG_P (operands[2])) 698 { 699 rtx tmp = gen_reg_rtx (<MODE>mode); 700 emit_insn (gen_aarch64_simd_dup<mode> (tmp, 701 convert_to_mode (<VEL>mode, 702 operands[2], 703 0))); 704 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 705 tmp)); 706 DONE; 707 } 708 else 709 FAIL; 710} 711) 712 713(define_expand "lshr<mode>3" 714 [(match_operand:VDQ_I 0 "register_operand" "") 715 (match_operand:VDQ_I 1 "register_operand" "") 716 (match_operand:SI 2 "general_operand" "")] 717 "TARGET_SIMD" 718{ 719 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 720 int shift_amount; 721 722 if (CONST_INT_P (operands[2])) 723 { 724 shift_amount = INTVAL (operands[2]); 725 if (shift_amount > 0 && shift_amount <= bit_width) 726 { 727 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 728 shift_amount); 729 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], 730 operands[1], 731 tmp)); 732 DONE; 733 } 734 else 735 operands[2] = force_reg (SImode, operands[2]); 736 } 737 else if (MEM_P (operands[2])) 738 { 739 operands[2] = force_reg (SImode, operands[2]); 740 } 741 742 if (REG_P (operands[2])) 743 { 744 rtx tmp = gen_reg_rtx (SImode); 745 rtx tmp1 = gen_reg_rtx (<MODE>mode); 746 emit_insn (gen_negsi2 (tmp, operands[2])); 747 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 748 convert_to_mode (<VEL>mode, 749 tmp, 0))); 750 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], 751 operands[1], 752 tmp1)); 753 DONE; 754 } 755 else 756 FAIL; 757} 758) 759 760(define_expand "ashr<mode>3" 761 [(match_operand:VDQ_I 0 "register_operand" "") 762 (match_operand:VDQ_I 1 "register_operand" "") 763 (match_operand:SI 2 "general_operand" "")] 764 "TARGET_SIMD" 765{ 766 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 767 int shift_amount; 768 769 if (CONST_INT_P (operands[2])) 770 { 771 shift_amount = INTVAL (operands[2]); 772 if (shift_amount > 0 && shift_amount <= bit_width) 773 { 774 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 775 shift_amount); 776 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], 777 operands[1], 778 tmp)); 779 DONE; 780 } 781 else 782 operands[2] = force_reg (SImode, operands[2]); 783 } 784 else if (MEM_P (operands[2])) 785 { 786 operands[2] = force_reg (SImode, operands[2]); 787 } 788 789 if (REG_P (operands[2])) 790 { 791 rtx tmp = gen_reg_rtx (SImode); 792 rtx tmp1 = gen_reg_rtx (<MODE>mode); 793 emit_insn (gen_negsi2 (tmp, operands[2])); 794 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 795 convert_to_mode (<VEL>mode, 796 tmp, 0))); 797 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], 798 operands[1], 799 tmp1)); 800 DONE; 801 } 802 else 803 FAIL; 804} 805) 806 807(define_expand "vashl<mode>3" 808 [(match_operand:VDQ_I 0 "register_operand" "") 809 (match_operand:VDQ_I 1 "register_operand" "") 810 (match_operand:VDQ_I 2 "register_operand" "")] 811 "TARGET_SIMD" 812{ 813 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 814 operands[2])); 815 DONE; 816}) 817 818;; Using mode VDQ_BHSI as there is no V2DImode neg! 819;; Negating individual lanes most certainly offsets the 820;; gain from vectorization. 821(define_expand "vashr<mode>3" 822 [(match_operand:VDQ_BHSI 0 "register_operand" "") 823 (match_operand:VDQ_BHSI 1 "register_operand" "") 824 (match_operand:VDQ_BHSI 2 "register_operand" "")] 825 "TARGET_SIMD" 826{ 827 rtx neg = gen_reg_rtx (<MODE>mode); 828 emit (gen_neg<mode>2 (neg, operands[2])); 829 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 830 neg)); 831 DONE; 832}) 833 834;; DI vector shift 835(define_expand "aarch64_ashr_simddi" 836 [(match_operand:DI 0 "register_operand" "=w") 837 (match_operand:DI 1 "register_operand" "w") 838 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 839 "TARGET_SIMD" 840 { 841 /* An arithmetic shift right by 64 fills the result with copies of the sign 842 bit, just like asr by 63 - however the standard pattern does not handle 843 a shift by 64. */ 844 if (INTVAL (operands[2]) == 64) 845 operands[2] = GEN_INT (63); 846 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); 847 DONE; 848 } 849) 850 851(define_expand "vlshr<mode>3" 852 [(match_operand:VDQ_BHSI 0 "register_operand" "") 853 (match_operand:VDQ_BHSI 1 "register_operand" "") 854 (match_operand:VDQ_BHSI 2 "register_operand" "")] 855 "TARGET_SIMD" 856{ 857 rtx neg = gen_reg_rtx (<MODE>mode); 858 emit (gen_neg<mode>2 (neg, operands[2])); 859 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 860 neg)); 861 DONE; 862}) 863 864(define_expand "aarch64_lshr_simddi" 865 [(match_operand:DI 0 "register_operand" "=w") 866 (match_operand:DI 1 "register_operand" "w") 867 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 868 "TARGET_SIMD" 869 { 870 if (INTVAL (operands[2]) == 64) 871 emit_move_insn (operands[0], const0_rtx); 872 else 873 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 874 DONE; 875 } 876) 877 878(define_expand "vec_set<mode>" 879 [(match_operand:VDQ_BHSI 0 "register_operand") 880 (match_operand:<VEL> 1 "register_operand") 881 (match_operand:SI 2 "immediate_operand")] 882 "TARGET_SIMD" 883 { 884 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 885 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 886 GEN_INT (elem), operands[0])); 887 DONE; 888 } 889) 890 891;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 892(define_insn "vec_shr_<mode>" 893 [(set (match_operand:VD 0 "register_operand" "=w") 894 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 895 (match_operand:SI 2 "immediate_operand" "i")] 896 UNSPEC_VEC_SHR))] 897 "TARGET_SIMD" 898 { 899 if (BYTES_BIG_ENDIAN) 900 return "shl %d0, %d1, %2"; 901 else 902 return "ushr %d0, %d1, %2"; 903 } 904 [(set_attr "type" "neon_shift_imm")] 905) 906 907(define_insn "aarch64_simd_vec_setv2di" 908 [(set (match_operand:V2DI 0 "register_operand" "=w,w") 909 (vec_merge:V2DI 910 (vec_duplicate:V2DI 911 (match_operand:DI 1 "register_operand" "r,w")) 912 (match_operand:V2DI 3 "register_operand" "0,0") 913 (match_operand:SI 2 "immediate_operand" "i,i")))] 914 "TARGET_SIMD" 915 { 916 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); 917 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 918 switch (which_alternative) 919 { 920 case 0: 921 return "ins\\t%0.d[%p2], %1"; 922 case 1: 923 return "ins\\t%0.d[%p2], %1.d[0]"; 924 default: 925 gcc_unreachable (); 926 } 927 } 928 [(set_attr "type" "neon_from_gp, neon_ins_q")] 929) 930 931(define_expand "vec_setv2di" 932 [(match_operand:V2DI 0 "register_operand") 933 (match_operand:DI 1 "register_operand") 934 (match_operand:SI 2 "immediate_operand")] 935 "TARGET_SIMD" 936 { 937 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 938 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], 939 GEN_INT (elem), operands[0])); 940 DONE; 941 } 942) 943 944(define_insn "aarch64_simd_vec_set<mode>" 945 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 946 (vec_merge:VDQF_F16 947 (vec_duplicate:VDQF_F16 948 (match_operand:<VEL> 1 "register_operand" "w")) 949 (match_operand:VDQF_F16 3 "register_operand" "0") 950 (match_operand:SI 2 "immediate_operand" "i")))] 951 "TARGET_SIMD" 952 { 953 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 954 955 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); 956 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 957 } 958 [(set_attr "type" "neon_ins<q>")] 959) 960 961(define_expand "vec_set<mode>" 962 [(match_operand:VDQF_F16 0 "register_operand" "+w") 963 (match_operand:<VEL> 1 "register_operand" "w") 964 (match_operand:SI 2 "immediate_operand" "")] 965 "TARGET_SIMD" 966 { 967 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 968 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 969 GEN_INT (elem), operands[0])); 970 DONE; 971 } 972) 973 974 975(define_insn "aarch64_mla<mode>" 976 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 977 (plus:VDQ_BHSI (mult:VDQ_BHSI 978 (match_operand:VDQ_BHSI 2 "register_operand" "w") 979 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 980 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 981 "TARGET_SIMD" 982 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 983 [(set_attr "type" "neon_mla_<Vetype><q>")] 984) 985 986(define_insn "*aarch64_mla_elt<mode>" 987 [(set (match_operand:VDQHS 0 "register_operand" "=w") 988 (plus:VDQHS 989 (mult:VDQHS 990 (vec_duplicate:VDQHS 991 (vec_select:<VEL> 992 (match_operand:VDQHS 1 "register_operand" "<h_con>") 993 (parallel [(match_operand:SI 2 "immediate_operand")]))) 994 (match_operand:VDQHS 3 "register_operand" "w")) 995 (match_operand:VDQHS 4 "register_operand" "0")))] 996 "TARGET_SIMD" 997 { 998 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 999 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1000 } 1001 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1002) 1003 1004(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" 1005 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1006 (plus:VDQHS 1007 (mult:VDQHS 1008 (vec_duplicate:VDQHS 1009 (vec_select:<VEL> 1010 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1011 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1012 (match_operand:VDQHS 3 "register_operand" "w")) 1013 (match_operand:VDQHS 4 "register_operand" "0")))] 1014 "TARGET_SIMD" 1015 { 1016 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1017 INTVAL (operands[2]))); 1018 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1019 } 1020 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1021) 1022 1023(define_insn "aarch64_mls<mode>" 1024 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1025 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") 1026 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") 1027 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] 1028 "TARGET_SIMD" 1029 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1030 [(set_attr "type" "neon_mla_<Vetype><q>")] 1031) 1032 1033(define_insn "*aarch64_mls_elt<mode>" 1034 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1035 (minus:VDQHS 1036 (match_operand:VDQHS 4 "register_operand" "0") 1037 (mult:VDQHS 1038 (vec_duplicate:VDQHS 1039 (vec_select:<VEL> 1040 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1041 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1042 (match_operand:VDQHS 3 "register_operand" "w"))))] 1043 "TARGET_SIMD" 1044 { 1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1046 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1047 } 1048 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1049) 1050 1051(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" 1052 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1053 (minus:VDQHS 1054 (match_operand:VDQHS 4 "register_operand" "0") 1055 (mult:VDQHS 1056 (vec_duplicate:VDQHS 1057 (vec_select:<VEL> 1058 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1059 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1060 (match_operand:VDQHS 3 "register_operand" "w"))))] 1061 "TARGET_SIMD" 1062 { 1063 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1064 INTVAL (operands[2]))); 1065 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1066 } 1067 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1068) 1069 1070;; Max/Min operations. 1071(define_insn "<su><maxmin><mode>3" 1072 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1073 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 1074 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 1075 "TARGET_SIMD" 1076 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1077 [(set_attr "type" "neon_minmax<q>")] 1078) 1079 1080(define_expand "<su><maxmin>v2di3" 1081 [(set (match_operand:V2DI 0 "register_operand" "") 1082 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") 1083 (match_operand:V2DI 2 "register_operand" "")))] 1084 "TARGET_SIMD" 1085{ 1086 enum rtx_code cmp_operator; 1087 rtx cmp_fmt; 1088 1089 switch (<CODE>) 1090 { 1091 case UMIN: 1092 cmp_operator = LTU; 1093 break; 1094 case SMIN: 1095 cmp_operator = LT; 1096 break; 1097 case UMAX: 1098 cmp_operator = GTU; 1099 break; 1100 case SMAX: 1101 cmp_operator = GT; 1102 break; 1103 default: 1104 gcc_unreachable (); 1105 } 1106 1107 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); 1108 emit_insn (gen_vcondv2div2di (operands[0], operands[1], 1109 operands[2], cmp_fmt, operands[1], operands[2])); 1110 DONE; 1111}) 1112 1113;; Pairwise Integer Max/Min operations. 1114(define_insn "aarch64_<maxmin_uns>p<mode>" 1115 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1116 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 1117 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 1118 MAXMINV))] 1119 "TARGET_SIMD" 1120 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1121 [(set_attr "type" "neon_minmax<q>")] 1122) 1123 1124;; Pairwise FP Max/Min operations. 1125(define_insn "aarch64_<maxmin_uns>p<mode>" 1126 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1127 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 1128 (match_operand:VHSDF 2 "register_operand" "w")] 1129 FMAXMINV))] 1130 "TARGET_SIMD" 1131 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1132 [(set_attr "type" "neon_minmax<q>")] 1133) 1134 1135;; vec_concat gives a new vector with the low elements from operand 1, and 1136;; the high elements from operand 2. That is to say, given op1 = { a, b } 1137;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. 1138;; What that means, is that the RTL descriptions of the below patterns 1139;; need to change depending on endianness. 1140 1141;; Move to the low architectural bits of the register. 1142;; On little-endian this is { operand, zeroes } 1143;; On big-endian this is { zeroes, operand } 1144 1145(define_insn "move_lo_quad_internal_<mode>" 1146 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1147 (vec_concat:VQ_NO2E 1148 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1149 (vec_duplicate:<VHALF> (const_int 0))))] 1150 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1151 "@ 1152 dup\\t%d0, %1.d[0] 1153 fmov\\t%d0, %1 1154 dup\\t%d0, %1" 1155 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1156 (set_attr "simd" "yes,*,yes") 1157 (set_attr "fp" "*,yes,*") 1158 (set_attr "length" "4")] 1159) 1160 1161(define_insn "move_lo_quad_internal_<mode>" 1162 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1163 (vec_concat:VQ_2E 1164 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1165 (const_int 0)))] 1166 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1167 "@ 1168 dup\\t%d0, %1.d[0] 1169 fmov\\t%d0, %1 1170 dup\\t%d0, %1" 1171 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1172 (set_attr "simd" "yes,*,yes") 1173 (set_attr "fp" "*,yes,*") 1174 (set_attr "length" "4")] 1175) 1176 1177(define_insn "move_lo_quad_internal_be_<mode>" 1178 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1179 (vec_concat:VQ_NO2E 1180 (vec_duplicate:<VHALF> (const_int 0)) 1181 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1182 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1183 "@ 1184 dup\\t%d0, %1.d[0] 1185 fmov\\t%d0, %1 1186 dup\\t%d0, %1" 1187 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1188 (set_attr "simd" "yes,*,yes") 1189 (set_attr "fp" "*,yes,*") 1190 (set_attr "length" "4")] 1191) 1192 1193(define_insn "move_lo_quad_internal_be_<mode>" 1194 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1195 (vec_concat:VQ_2E 1196 (const_int 0) 1197 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1198 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1199 "@ 1200 dup\\t%d0, %1.d[0] 1201 fmov\\t%d0, %1 1202 dup\\t%d0, %1" 1203 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1204 (set_attr "simd" "yes,*,yes") 1205 (set_attr "fp" "*,yes,*") 1206 (set_attr "length" "4")] 1207) 1208 1209(define_expand "move_lo_quad_<mode>" 1210 [(match_operand:VQ 0 "register_operand") 1211 (match_operand:VQ 1 "register_operand")] 1212 "TARGET_SIMD" 1213{ 1214 if (BYTES_BIG_ENDIAN) 1215 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); 1216 else 1217 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); 1218 DONE; 1219} 1220) 1221 1222;; Move operand1 to the high architectural bits of the register, keeping 1223;; the low architectural bits of operand2. 1224;; For little-endian this is { operand2, operand1 } 1225;; For big-endian this is { operand1, operand2 } 1226 1227(define_insn "aarch64_simd_move_hi_quad_<mode>" 1228 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1229 (vec_concat:VQ 1230 (vec_select:<VHALF> 1231 (match_dup 0) 1232 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) 1233 (match_operand:<VHALF> 1 "register_operand" "w,r")))] 1234 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1235 "@ 1236 ins\\t%0.d[1], %1.d[0] 1237 ins\\t%0.d[1], %1" 1238 [(set_attr "type" "neon_ins")] 1239) 1240 1241(define_insn "aarch64_simd_move_hi_quad_be_<mode>" 1242 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1243 (vec_concat:VQ 1244 (match_operand:<VHALF> 1 "register_operand" "w,r") 1245 (vec_select:<VHALF> 1246 (match_dup 0) 1247 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] 1248 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1249 "@ 1250 ins\\t%0.d[1], %1.d[0] 1251 ins\\t%0.d[1], %1" 1252 [(set_attr "type" "neon_ins")] 1253) 1254 1255(define_expand "move_hi_quad_<mode>" 1256 [(match_operand:VQ 0 "register_operand" "") 1257 (match_operand:<VHALF> 1 "register_operand" "")] 1258 "TARGET_SIMD" 1259{ 1260 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1261 if (BYTES_BIG_ENDIAN) 1262 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1263 operands[1], p)); 1264 else 1265 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1266 operands[1], p)); 1267 DONE; 1268}) 1269 1270;; Narrowing operations. 1271 1272;; For doubles. 1273(define_insn "aarch64_simd_vec_pack_trunc_<mode>" 1274 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 1275 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 1276 "TARGET_SIMD" 1277 "xtn\\t%0.<Vntype>, %1.<Vtype>" 1278 [(set_attr "type" "neon_shift_imm_narrow_q")] 1279) 1280 1281(define_expand "vec_pack_trunc_<mode>" 1282 [(match_operand:<VNARROWD> 0 "register_operand" "") 1283 (match_operand:VDN 1 "register_operand" "") 1284 (match_operand:VDN 2 "register_operand" "")] 1285 "TARGET_SIMD" 1286{ 1287 rtx tempreg = gen_reg_rtx (<VDBL>mode); 1288 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 1289 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 1290 1291 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); 1292 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); 1293 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); 1294 DONE; 1295}) 1296 1297;; For quads. 1298 1299(define_insn "vec_pack_trunc_<mode>" 1300 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") 1301 (vec_concat:<VNARROWQ2> 1302 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) 1303 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 1304 "TARGET_SIMD" 1305 { 1306 if (BYTES_BIG_ENDIAN) 1307 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; 1308 else 1309 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; 1310 } 1311 [(set_attr "type" "multiple") 1312 (set_attr "length" "8")] 1313) 1314 1315;; Widening operations. 1316 1317(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" 1318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1319 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1320 (match_operand:VQW 1 "register_operand" "w") 1321 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1322 )))] 1323 "TARGET_SIMD" 1324 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0" 1325 [(set_attr "type" "neon_shift_imm_long")] 1326) 1327 1328(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1330 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1331 (match_operand:VQW 1 "register_operand" "w") 1332 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1333 )))] 1334 "TARGET_SIMD" 1335 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0" 1336 [(set_attr "type" "neon_shift_imm_long")] 1337) 1338 1339(define_expand "vec_unpack<su>_hi_<mode>" 1340 [(match_operand:<VWIDE> 0 "register_operand" "") 1341 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1342 "TARGET_SIMD" 1343 { 1344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 1345 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1346 operands[1], p)); 1347 DONE; 1348 } 1349) 1350 1351(define_expand "vec_unpack<su>_lo_<mode>" 1352 [(match_operand:<VWIDE> 0 "register_operand" "") 1353 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] 1354 "TARGET_SIMD" 1355 { 1356 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1357 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1358 operands[1], p)); 1359 DONE; 1360 } 1361) 1362 1363;; Widening arithmetic. 1364 1365(define_insn "*aarch64_<su>mlal_lo<mode>" 1366 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1367 (plus:<VWIDE> 1368 (mult:<VWIDE> 1369 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1370 (match_operand:VQW 2 "register_operand" "w") 1371 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1372 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1373 (match_operand:VQW 4 "register_operand" "w") 1374 (match_dup 3)))) 1375 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1376 "TARGET_SIMD" 1377 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1378 [(set_attr "type" "neon_mla_<Vetype>_long")] 1379) 1380 1381(define_insn "*aarch64_<su>mlal_hi<mode>" 1382 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1383 (plus:<VWIDE> 1384 (mult:<VWIDE> 1385 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1386 (match_operand:VQW 2 "register_operand" "w") 1387 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1388 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1389 (match_operand:VQW 4 "register_operand" "w") 1390 (match_dup 3)))) 1391 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1392 "TARGET_SIMD" 1393 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1394 [(set_attr "type" "neon_mla_<Vetype>_long")] 1395) 1396 1397(define_insn "*aarch64_<su>mlsl_lo<mode>" 1398 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1399 (minus:<VWIDE> 1400 (match_operand:<VWIDE> 1 "register_operand" "0") 1401 (mult:<VWIDE> 1402 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1403 (match_operand:VQW 2 "register_operand" "w") 1404 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1405 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1406 (match_operand:VQW 4 "register_operand" "w") 1407 (match_dup 3))))))] 1408 "TARGET_SIMD" 1409 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1410 [(set_attr "type" "neon_mla_<Vetype>_long")] 1411) 1412 1413(define_insn "*aarch64_<su>mlsl_hi<mode>" 1414 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1415 (minus:<VWIDE> 1416 (match_operand:<VWIDE> 1 "register_operand" "0") 1417 (mult:<VWIDE> 1418 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1419 (match_operand:VQW 2 "register_operand" "w") 1420 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1421 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1422 (match_operand:VQW 4 "register_operand" "w") 1423 (match_dup 3))))))] 1424 "TARGET_SIMD" 1425 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1426 [(set_attr "type" "neon_mla_<Vetype>_long")] 1427) 1428 1429(define_insn "*aarch64_<su>mlal<mode>" 1430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1431 (plus:<VWIDE> 1432 (mult:<VWIDE> 1433 (ANY_EXTEND:<VWIDE> 1434 (match_operand:VD_BHSI 1 "register_operand" "w")) 1435 (ANY_EXTEND:<VWIDE> 1436 (match_operand:VD_BHSI 2 "register_operand" "w"))) 1437 (match_operand:<VWIDE> 3 "register_operand" "0")))] 1438 "TARGET_SIMD" 1439 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1440 [(set_attr "type" "neon_mla_<Vetype>_long")] 1441) 1442 1443(define_insn "*aarch64_<su>mlsl<mode>" 1444 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1445 (minus:<VWIDE> 1446 (match_operand:<VWIDE> 1 "register_operand" "0") 1447 (mult:<VWIDE> 1448 (ANY_EXTEND:<VWIDE> 1449 (match_operand:VD_BHSI 2 "register_operand" "w")) 1450 (ANY_EXTEND:<VWIDE> 1451 (match_operand:VD_BHSI 3 "register_operand" "w")))))] 1452 "TARGET_SIMD" 1453 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 1454 [(set_attr "type" "neon_mla_<Vetype>_long")] 1455) 1456 1457(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" 1458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1459 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1460 (match_operand:VQW 1 "register_operand" "w") 1461 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1462 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1463 (match_operand:VQW 2 "register_operand" "w") 1464 (match_dup 3)))))] 1465 "TARGET_SIMD" 1466 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 1467 [(set_attr "type" "neon_mul_<Vetype>_long")] 1468) 1469 1470(define_expand "vec_widen_<su>mult_lo_<mode>" 1471 [(match_operand:<VWIDE> 0 "register_operand" "") 1472 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1473 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1474 "TARGET_SIMD" 1475 { 1476 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1477 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 1478 operands[1], 1479 operands[2], p)); 1480 DONE; 1481 } 1482) 1483 1484(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" 1485 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1486 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1487 (match_operand:VQW 1 "register_operand" "w") 1488 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1489 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1490 (match_operand:VQW 2 "register_operand" "w") 1491 (match_dup 3)))))] 1492 "TARGET_SIMD" 1493 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1494 [(set_attr "type" "neon_mul_<Vetype>_long")] 1495) 1496 1497(define_expand "vec_widen_<su>mult_hi_<mode>" 1498 [(match_operand:<VWIDE> 0 "register_operand" "") 1499 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1500 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1501 "TARGET_SIMD" 1502 { 1503 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 1504 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 1505 operands[1], 1506 operands[2], p)); 1507 DONE; 1508 1509 } 1510) 1511 1512;; FP vector operations. 1513;; AArch64 AdvSIMD supports single-precision (32-bit) and 1514;; double-precision (64-bit) floating-point data types and arithmetic as 1515;; defined by the IEEE 754-2008 standard. This makes them vectorizable 1516;; without the need for -ffast-math or -funsafe-math-optimizations. 1517;; 1518;; Floating-point operations can raise an exception. Vectorizing such 1519;; operations are safe because of reasons explained below. 1520;; 1521;; ARMv8 permits an extension to enable trapped floating-point 1522;; exception handling, however this is an optional feature. In the 1523;; event of a floating-point exception being raised by vectorised 1524;; code then: 1525;; 1. If trapped floating-point exceptions are available, then a trap 1526;; will be taken when any lane raises an enabled exception. A trap 1527;; handler may determine which lane raised the exception. 1528;; 2. Alternatively a sticky exception flag is set in the 1529;; floating-point status register (FPSR). Software may explicitly 1530;; test the exception flags, in which case the tests will either 1531;; prevent vectorisation, allowing precise identification of the 1532;; failing operation, or if tested outside of vectorisable regions 1533;; then the specific operation and lane are not of interest. 1534 1535;; FP arithmetic operations. 1536 1537(define_insn "add<mode>3" 1538 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1539 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1540 (match_operand:VHSDF 2 "register_operand" "w")))] 1541 "TARGET_SIMD" 1542 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1543 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1544) 1545 1546(define_insn "sub<mode>3" 1547 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1548 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1549 (match_operand:VHSDF 2 "register_operand" "w")))] 1550 "TARGET_SIMD" 1551 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1552 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1553) 1554 1555(define_insn "mul<mode>3" 1556 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1557 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1558 (match_operand:VHSDF 2 "register_operand" "w")))] 1559 "TARGET_SIMD" 1560 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1561 [(set_attr "type" "neon_fp_mul_<stype><q>")] 1562) 1563 1564(define_expand "div<mode>3" 1565 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1566 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1567 (match_operand:VHSDF 2 "register_operand" "w")))] 1568 "TARGET_SIMD" 1569{ 1570 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 1571 DONE; 1572 1573 operands[1] = force_reg (<MODE>mode, operands[1]); 1574}) 1575 1576(define_insn "*div<mode>3" 1577 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1578 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1579 (match_operand:VHSDF 2 "register_operand" "w")))] 1580 "TARGET_SIMD" 1581 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1582 [(set_attr "type" "neon_fp_div_<stype><q>")] 1583) 1584 1585(define_insn "neg<mode>2" 1586 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1587 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1588 "TARGET_SIMD" 1589 "fneg\\t%0.<Vtype>, %1.<Vtype>" 1590 [(set_attr "type" "neon_fp_neg_<stype><q>")] 1591) 1592 1593(define_insn "abs<mode>2" 1594 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1595 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1596 "TARGET_SIMD" 1597 "fabs\\t%0.<Vtype>, %1.<Vtype>" 1598 [(set_attr "type" "neon_fp_abs_<stype><q>")] 1599) 1600 1601(define_insn "fma<mode>4" 1602 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1603 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1604 (match_operand:VHSDF 2 "register_operand" "w") 1605 (match_operand:VHSDF 3 "register_operand" "0")))] 1606 "TARGET_SIMD" 1607 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1608 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1609) 1610 1611(define_insn "*aarch64_fma4_elt<mode>" 1612 [(set (match_operand:VDQF 0 "register_operand" "=w") 1613 (fma:VDQF 1614 (vec_duplicate:VDQF 1615 (vec_select:<VEL> 1616 (match_operand:VDQF 1 "register_operand" "<h_con>") 1617 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1618 (match_operand:VDQF 3 "register_operand" "w") 1619 (match_operand:VDQF 4 "register_operand" "0")))] 1620 "TARGET_SIMD" 1621 { 1622 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1624 } 1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1626) 1627 1628(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" 1629 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1630 (fma:VDQSF 1631 (vec_duplicate:VDQSF 1632 (vec_select:<VEL> 1633 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1634 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1635 (match_operand:VDQSF 3 "register_operand" "w") 1636 (match_operand:VDQSF 4 "register_operand" "0")))] 1637 "TARGET_SIMD" 1638 { 1639 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1640 INTVAL (operands[2]))); 1641 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1642 } 1643 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1644) 1645 1646(define_insn "*aarch64_fma4_elt_from_dup<mode>" 1647 [(set (match_operand:VMUL 0 "register_operand" "=w") 1648 (fma:VMUL 1649 (vec_duplicate:VMUL 1650 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1651 (match_operand:VMUL 2 "register_operand" "w") 1652 (match_operand:VMUL 3 "register_operand" "0")))] 1653 "TARGET_SIMD" 1654 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1655 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 1656) 1657 1658(define_insn "*aarch64_fma4_elt_to_64v2df" 1659 [(set (match_operand:DF 0 "register_operand" "=w") 1660 (fma:DF 1661 (vec_select:DF 1662 (match_operand:V2DF 1 "register_operand" "w") 1663 (parallel [(match_operand:SI 2 "immediate_operand")])) 1664 (match_operand:DF 3 "register_operand" "w") 1665 (match_operand:DF 4 "register_operand" "0")))] 1666 "TARGET_SIMD" 1667 { 1668 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 1669 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; 1670 } 1671 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1672) 1673 1674(define_insn "fnma<mode>4" 1675 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1676 (fma:VHSDF 1677 (match_operand:VHSDF 1 "register_operand" "w") 1678 (neg:VHSDF 1679 (match_operand:VHSDF 2 "register_operand" "w")) 1680 (match_operand:VHSDF 3 "register_operand" "0")))] 1681 "TARGET_SIMD" 1682 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1683 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1684) 1685 1686(define_insn "*aarch64_fnma4_elt<mode>" 1687 [(set (match_operand:VDQF 0 "register_operand" "=w") 1688 (fma:VDQF 1689 (neg:VDQF 1690 (match_operand:VDQF 3 "register_operand" "w")) 1691 (vec_duplicate:VDQF 1692 (vec_select:<VEL> 1693 (match_operand:VDQF 1 "register_operand" "<h_con>") 1694 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1695 (match_operand:VDQF 4 "register_operand" "0")))] 1696 "TARGET_SIMD" 1697 { 1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1699 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1700 } 1701 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1702) 1703 1704(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" 1705 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1706 (fma:VDQSF 1707 (neg:VDQSF 1708 (match_operand:VDQSF 3 "register_operand" "w")) 1709 (vec_duplicate:VDQSF 1710 (vec_select:<VEL> 1711 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1712 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1713 (match_operand:VDQSF 4 "register_operand" "0")))] 1714 "TARGET_SIMD" 1715 { 1716 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1717 INTVAL (operands[2]))); 1718 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1719 } 1720 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1721) 1722 1723(define_insn "*aarch64_fnma4_elt_from_dup<mode>" 1724 [(set (match_operand:VMUL 0 "register_operand" "=w") 1725 (fma:VMUL 1726 (neg:VMUL 1727 (match_operand:VMUL 2 "register_operand" "w")) 1728 (vec_duplicate:VMUL 1729 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1730 (match_operand:VMUL 3 "register_operand" "0")))] 1731 "TARGET_SIMD" 1732 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1733 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 1734) 1735 1736(define_insn "*aarch64_fnma4_elt_to_64v2df" 1737 [(set (match_operand:DF 0 "register_operand" "=w") 1738 (fma:DF 1739 (vec_select:DF 1740 (match_operand:V2DF 1 "register_operand" "w") 1741 (parallel [(match_operand:SI 2 "immediate_operand")])) 1742 (neg:DF 1743 (match_operand:DF 3 "register_operand" "w")) 1744 (match_operand:DF 4 "register_operand" "0")))] 1745 "TARGET_SIMD" 1746 { 1747 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 1748 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; 1749 } 1750 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1751) 1752 1753;; Vector versions of the floating-point frint patterns. 1754;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. 1755(define_insn "<frint_pattern><mode>2" 1756 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1757 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 1758 FRINT))] 1759 "TARGET_SIMD" 1760 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" 1761 [(set_attr "type" "neon_fp_round_<stype><q>")] 1762) 1763 1764;; Vector versions of the fcvt standard patterns. 1765;; Expands to lbtrunc, lround, lceil, lfloor 1766(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" 1767 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 1768 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1769 [(match_operand:VHSDF 1 "register_operand" "w")] 1770 FCVT)))] 1771 "TARGET_SIMD" 1772 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" 1773 [(set_attr "type" "neon_fp_to_int_<stype><q>")] 1774) 1775 1776;; HF Scalar variants of related SIMD instructions. 1777(define_insn "l<fcvt_pattern><su_optab>hfhi2" 1778 [(set (match_operand:HI 0 "register_operand" "=w") 1779 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] 1780 FCVT)))] 1781 "TARGET_SIMD_F16INST" 1782 "fcvt<frint_suffix><su>\t%h0, %h1" 1783 [(set_attr "type" "neon_fp_to_int_s")] 1784) 1785 1786(define_insn "<optab>_trunchfhi2" 1787 [(set (match_operand:HI 0 "register_operand" "=w") 1788 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] 1789 "TARGET_SIMD_F16INST" 1790 "fcvtz<su>\t%h0, %h1" 1791 [(set_attr "type" "neon_fp_to_int_s")] 1792) 1793 1794(define_insn "<optab>hihf2" 1795 [(set (match_operand:HF 0 "register_operand" "=w") 1796 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] 1797 "TARGET_SIMD_F16INST" 1798 "<su_optab>cvtf\t%h0, %h1" 1799 [(set_attr "type" "neon_int_to_fp_s")] 1800) 1801 1802(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" 1803 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 1804 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1805 [(mult:VDQF 1806 (match_operand:VDQF 1 "register_operand" "w") 1807 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] 1808 UNSPEC_FRINTZ)))] 1809 "TARGET_SIMD 1810 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, 1811 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" 1812 { 1813 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); 1814 char buf[64]; 1815 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); 1816 output_asm_insn (buf, operands); 1817 return ""; 1818 } 1819 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] 1820) 1821 1822(define_expand "<optab><VHSDF:mode><fcvt_target>2" 1823 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 1824 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1825 [(match_operand:VHSDF 1 "register_operand")] 1826 UNSPEC_FRINTZ)))] 1827 "TARGET_SIMD" 1828 {}) 1829 1830(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" 1831 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 1832 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1833 [(match_operand:VHSDF 1 "register_operand")] 1834 UNSPEC_FRINTZ)))] 1835 "TARGET_SIMD" 1836 {}) 1837 1838(define_expand "ftrunc<VHSDF:mode>2" 1839 [(set (match_operand:VHSDF 0 "register_operand") 1840 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 1841 UNSPEC_FRINTZ))] 1842 "TARGET_SIMD" 1843 {}) 1844 1845(define_insn "<optab><fcvt_target><VHSDF:mode>2" 1846 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1847 (FLOATUORS:VHSDF 1848 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] 1849 "TARGET_SIMD" 1850 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" 1851 [(set_attr "type" "neon_int_to_fp_<stype><q>")] 1852) 1853 1854;; Conversions between vectors of floats and doubles. 1855;; Contains a mix of patterns to match standard pattern names 1856;; and those for intrinsics. 1857 1858;; Float widening operations. 1859 1860(define_insn "aarch64_simd_vec_unpacks_lo_<mode>" 1861 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1862 (float_extend:<VWIDE> (vec_select:<VHALF> 1863 (match_operand:VQ_HSF 1 "register_operand" "w") 1864 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") 1865 )))] 1866 "TARGET_SIMD" 1867 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" 1868 [(set_attr "type" "neon_fp_cvt_widen_s")] 1869) 1870 1871;; Convert between fixed-point and floating-point (vector modes) 1872 1873(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" 1874 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") 1875 (unspec:<VHSDF:FCVT_TARGET> 1876 [(match_operand:VHSDF 1 "register_operand" "w") 1877 (match_operand:SI 2 "immediate_operand" "i")] 1878 FCVT_F2FIXED))] 1879 "TARGET_SIMD" 1880 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 1881 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] 1882) 1883 1884(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" 1885 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") 1886 (unspec:<VDQ_HSDI:FCVT_TARGET> 1887 [(match_operand:VDQ_HSDI 1 "register_operand" "w") 1888 (match_operand:SI 2 "immediate_operand" "i")] 1889 FCVT_FIXED2F))] 1890 "TARGET_SIMD" 1891 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 1892 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] 1893) 1894 1895;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns 1896;; is inconsistent with vector ordering elsewhere in the compiler, in that 1897;; the meaning of HI and LO changes depending on the target endianness. 1898;; While elsewhere we map the higher numbered elements of a vector to 1899;; the lower architectural lanes of the vector, for these patterns we want 1900;; to always treat "hi" as referring to the higher architectural lanes. 1901;; Consequently, while the patterns below look inconsistent with our 1902;; other big-endian patterns their behavior is as required. 1903 1904(define_expand "vec_unpacks_lo_<mode>" 1905 [(match_operand:<VWIDE> 0 "register_operand" "") 1906 (match_operand:VQ_HSF 1 "register_operand" "")] 1907 "TARGET_SIMD" 1908 { 1909 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1910 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 1911 operands[1], p)); 1912 DONE; 1913 } 1914) 1915 1916(define_insn "aarch64_simd_vec_unpacks_hi_<mode>" 1917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1918 (float_extend:<VWIDE> (vec_select:<VHALF> 1919 (match_operand:VQ_HSF 1 "register_operand" "w") 1920 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") 1921 )))] 1922 "TARGET_SIMD" 1923 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" 1924 [(set_attr "type" "neon_fp_cvt_widen_s")] 1925) 1926 1927(define_expand "vec_unpacks_hi_<mode>" 1928 [(match_operand:<VWIDE> 0 "register_operand" "") 1929 (match_operand:VQ_HSF 1 "register_operand" "")] 1930 "TARGET_SIMD" 1931 { 1932 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 1933 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 1934 operands[1], p)); 1935 DONE; 1936 } 1937) 1938(define_insn "aarch64_float_extend_lo_<Vwide>" 1939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1940 (float_extend:<VWIDE> 1941 (match_operand:VDF 1 "register_operand" "w")))] 1942 "TARGET_SIMD" 1943 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" 1944 [(set_attr "type" "neon_fp_cvt_widen_s")] 1945) 1946 1947;; Float narrowing operations. 1948 1949(define_insn "aarch64_float_truncate_lo_<mode>" 1950 [(set (match_operand:VDF 0 "register_operand" "=w") 1951 (float_truncate:VDF 1952 (match_operand:<VWIDE> 1 "register_operand" "w")))] 1953 "TARGET_SIMD" 1954 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" 1955 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 1956) 1957 1958(define_insn "aarch64_float_truncate_hi_<Vdbl>_le" 1959 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 1960 (vec_concat:<VDBL> 1961 (match_operand:VDF 1 "register_operand" "0") 1962 (float_truncate:VDF 1963 (match_operand:<VWIDE> 2 "register_operand" "w"))))] 1964 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1965 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 1966 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 1967) 1968 1969(define_insn "aarch64_float_truncate_hi_<Vdbl>_be" 1970 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 1971 (vec_concat:<VDBL> 1972 (float_truncate:VDF 1973 (match_operand:<VWIDE> 2 "register_operand" "w")) 1974 (match_operand:VDF 1 "register_operand" "0")))] 1975 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1976 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 1977 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 1978) 1979 1980(define_expand "aarch64_float_truncate_hi_<Vdbl>" 1981 [(match_operand:<VDBL> 0 "register_operand" "=w") 1982 (match_operand:VDF 1 "register_operand" "0") 1983 (match_operand:<VWIDE> 2 "register_operand" "w")] 1984 "TARGET_SIMD" 1985{ 1986 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN 1987 ? gen_aarch64_float_truncate_hi_<Vdbl>_be 1988 : gen_aarch64_float_truncate_hi_<Vdbl>_le; 1989 emit_insn (gen (operands[0], operands[1], operands[2])); 1990 DONE; 1991} 1992) 1993 1994(define_expand "vec_pack_trunc_v2df" 1995 [(set (match_operand:V4SF 0 "register_operand") 1996 (vec_concat:V4SF 1997 (float_truncate:V2SF 1998 (match_operand:V2DF 1 "register_operand")) 1999 (float_truncate:V2SF 2000 (match_operand:V2DF 2 "register_operand")) 2001 ))] 2002 "TARGET_SIMD" 2003 { 2004 rtx tmp = gen_reg_rtx (V2SFmode); 2005 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2006 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2007 2008 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); 2009 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], 2010 tmp, operands[hi])); 2011 DONE; 2012 } 2013) 2014 2015(define_expand "vec_pack_trunc_df" 2016 [(set (match_operand:V2SF 0 "register_operand") 2017 (vec_concat:V2SF 2018 (float_truncate:SF 2019 (match_operand:DF 1 "register_operand")) 2020 (float_truncate:SF 2021 (match_operand:DF 2 "register_operand")) 2022 ))] 2023 "TARGET_SIMD" 2024 { 2025 rtx tmp = gen_reg_rtx (V2SFmode); 2026 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2027 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2028 2029 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); 2030 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); 2031 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); 2032 DONE; 2033 } 2034) 2035 2036;; FP Max/Min 2037;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 2038;; expression like: 2039;; a = (b < c) ? b : c; 2040;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled 2041;; either explicitly or indirectly via -ffast-math. 2042;; 2043;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 2044;; The 'smax' and 'smin' RTL standard pattern names do not specify which 2045;; operand will be returned when both operands are zero (i.e. they may not 2046;; honour signed zeroes), or when either operand is NaN. Therefore GCC 2047;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring 2048;; NaNs. 2049 2050(define_insn "<su><maxmin><mode>3" 2051 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2052 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2053 (match_operand:VHSDF 2 "register_operand" "w")))] 2054 "TARGET_SIMD" 2055 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2056 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2057) 2058 2059;; Vector forms for fmax, fmin, fmaxnm, fminnm. 2060;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, 2061;; which implement the IEEE fmax ()/fmin () functions. 2062(define_insn "<maxmin_uns><mode>3" 2063 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2064 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2065 (match_operand:VHSDF 2 "register_operand" "w")] 2066 FMAXMIN_UNS))] 2067 "TARGET_SIMD" 2068 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2069 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2070) 2071 2072;; 'across lanes' add. 2073 2074(define_expand "reduc_plus_scal_<mode>" 2075 [(match_operand:<VEL> 0 "register_operand" "=w") 2076 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] 2077 UNSPEC_ADDV)] 2078 "TARGET_SIMD" 2079 { 2080 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2081 rtx scratch = gen_reg_rtx (<MODE>mode); 2082 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 2083 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2084 DONE; 2085 } 2086) 2087 2088(define_insn "aarch64_faddp<mode>" 2089 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2090 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2091 (match_operand:VHSDF 2 "register_operand" "w")] 2092 UNSPEC_FADDV))] 2093 "TARGET_SIMD" 2094 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2095 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] 2096) 2097 2098(define_insn "aarch64_reduc_plus_internal<mode>" 2099 [(set (match_operand:VDQV 0 "register_operand" "=w") 2100 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] 2101 UNSPEC_ADDV))] 2102 "TARGET_SIMD" 2103 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" 2104 [(set_attr "type" "neon_reduc_add<q>")] 2105) 2106 2107(define_insn "aarch64_reduc_plus_internalv2si" 2108 [(set (match_operand:V2SI 0 "register_operand" "=w") 2109 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2110 UNSPEC_ADDV))] 2111 "TARGET_SIMD" 2112 "addp\\t%0.2s, %1.2s, %1.2s" 2113 [(set_attr "type" "neon_reduc_add")] 2114) 2115 2116(define_insn "reduc_plus_scal_<mode>" 2117 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2118 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] 2119 UNSPEC_FADDV))] 2120 "TARGET_SIMD" 2121 "faddp\\t%<Vetype>0, %1.<Vtype>" 2122 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] 2123) 2124 2125(define_expand "reduc_plus_scal_v4sf" 2126 [(set (match_operand:SF 0 "register_operand") 2127 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 2128 UNSPEC_FADDV))] 2129 "TARGET_SIMD" 2130{ 2131 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); 2132 rtx scratch = gen_reg_rtx (V4SFmode); 2133 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 2134 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 2135 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 2136 DONE; 2137}) 2138 2139(define_insn "clrsb<mode>2" 2140 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2141 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2142 "TARGET_SIMD" 2143 "cls\\t%0.<Vtype>, %1.<Vtype>" 2144 [(set_attr "type" "neon_cls<q>")] 2145) 2146 2147(define_insn "clz<mode>2" 2148 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2149 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2150 "TARGET_SIMD" 2151 "clz\\t%0.<Vtype>, %1.<Vtype>" 2152 [(set_attr "type" "neon_cls<q>")] 2153) 2154 2155(define_insn "popcount<mode>2" 2156 [(set (match_operand:VB 0 "register_operand" "=w") 2157 (popcount:VB (match_operand:VB 1 "register_operand" "w")))] 2158 "TARGET_SIMD" 2159 "cnt\\t%0.<Vbtype>, %1.<Vbtype>" 2160 [(set_attr "type" "neon_cnt<q>")] 2161) 2162 2163;; 'across lanes' max and min ops. 2164 2165;; Template for outputting a scalar, so we can create __builtins which can be 2166;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). 2167(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2168 [(match_operand:<VEL> 0 "register_operand") 2169 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2170 FMAXMINV)] 2171 "TARGET_SIMD" 2172 { 2173 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2174 rtx scratch = gen_reg_rtx (<MODE>mode); 2175 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2176 operands[1])); 2177 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2178 DONE; 2179 } 2180) 2181 2182;; Likewise for integer cases, signed and unsigned. 2183(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2184 [(match_operand:<VEL> 0 "register_operand") 2185 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 2186 MAXMINV)] 2187 "TARGET_SIMD" 2188 { 2189 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2190 rtx scratch = gen_reg_rtx (<MODE>mode); 2191 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2192 operands[1])); 2193 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2194 DONE; 2195 } 2196) 2197 2198(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2199 [(set (match_operand:VDQV_S 0 "register_operand" "=w") 2200 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] 2201 MAXMINV))] 2202 "TARGET_SIMD" 2203 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" 2204 [(set_attr "type" "neon_reduc_minmax<q>")] 2205) 2206 2207(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" 2208 [(set (match_operand:V2SI 0 "register_operand" "=w") 2209 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2210 MAXMINV))] 2211 "TARGET_SIMD" 2212 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" 2213 [(set_attr "type" "neon_reduc_minmax")] 2214) 2215 2216(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2217 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2218 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2219 FMAXMINV))] 2220 "TARGET_SIMD" 2221 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" 2222 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] 2223) 2224 2225;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register 2226;; allocation. 2227;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which 2228;; to select. 2229;; 2230;; Thus our BSL is of the form: 2231;; op0 = bsl (mask, op2, op3) 2232;; We can use any of: 2233;; 2234;; if (op0 = mask) 2235;; bsl mask, op1, op2 2236;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) 2237;; bit op0, op2, mask 2238;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) 2239;; bif op0, op1, mask 2240;; 2241;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 2242;; Some forms of straight-line code may generate the equivalent form 2243;; in *aarch64_simd_bsl<mode>_alt. 2244 2245(define_insn "aarch64_simd_bsl<mode>_internal" 2246 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") 2247 (xor:VSDQ_I_DI 2248 (and:VSDQ_I_DI 2249 (xor:VSDQ_I_DI 2250 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w") 2251 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0")) 2252 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) 2253 (match_dup:<V_cmp_result> 3) 2254 ))] 2255 "TARGET_SIMD" 2256 "@ 2257 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 2258 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> 2259 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" 2260 [(set_attr "type" "neon_bsl<q>")] 2261) 2262 2263;; We need this form in addition to the above pattern to match the case 2264;; when combine tries merging three insns such that the second operand of 2265;; the outer XOR matches the second operand of the inner XOR rather than 2266;; the first. The two are equivalent but since recog doesn't try all 2267;; permutations of commutative operations, we have to have a separate pattern. 2268 2269(define_insn "*aarch64_simd_bsl<mode>_alt" 2270 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") 2271 (xor:VSDQ_I_DI 2272 (and:VSDQ_I_DI 2273 (xor:VSDQ_I_DI 2274 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0") 2275 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w")) 2276 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) 2277 (match_dup:VSDQ_I_DI 2)))] 2278 "TARGET_SIMD" 2279 "@ 2280 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 2281 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 2282 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 2283 [(set_attr "type" "neon_bsl<q>")] 2284) 2285 2286(define_expand "aarch64_simd_bsl<mode>" 2287 [(match_operand:VALLDIF 0 "register_operand") 2288 (match_operand:<V_cmp_result> 1 "register_operand") 2289 (match_operand:VALLDIF 2 "register_operand") 2290 (match_operand:VALLDIF 3 "register_operand")] 2291 "TARGET_SIMD" 2292{ 2293 /* We can't alias operands together if they have different modes. */ 2294 rtx tmp = operands[0]; 2295 if (FLOAT_MODE_P (<MODE>mode)) 2296 { 2297 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]); 2298 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]); 2299 tmp = gen_reg_rtx (<V_cmp_result>mode); 2300 } 2301 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); 2302 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp, 2303 operands[1], 2304 operands[2], 2305 operands[3])); 2306 if (tmp != operands[0]) 2307 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); 2308 2309 DONE; 2310}) 2311 2312(define_expand "vcond_mask_<mode><v_cmp_result>" 2313 [(match_operand:VALLDI 0 "register_operand") 2314 (match_operand:VALLDI 1 "nonmemory_operand") 2315 (match_operand:VALLDI 2 "nonmemory_operand") 2316 (match_operand:<V_cmp_result> 3 "register_operand")] 2317 "TARGET_SIMD" 2318{ 2319 /* If we have (a = (P) ? -1 : 0); 2320 Then we can simply move the generated mask (result must be int). */ 2321 if (operands[1] == CONSTM1_RTX (<MODE>mode) 2322 && operands[2] == CONST0_RTX (<MODE>mode)) 2323 emit_move_insn (operands[0], operands[3]); 2324 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ 2325 else if (operands[1] == CONST0_RTX (<MODE>mode) 2326 && operands[2] == CONSTM1_RTX (<MODE>mode)) 2327 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3])); 2328 else 2329 { 2330 if (!REG_P (operands[1])) 2331 operands[1] = force_reg (<MODE>mode, operands[1]); 2332 if (!REG_P (operands[2])) 2333 operands[2] = force_reg (<MODE>mode, operands[2]); 2334 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], 2335 operands[1], operands[2])); 2336 } 2337 2338 DONE; 2339}) 2340 2341;; Patterns comparing two vectors to produce a mask. 2342 2343(define_expand "vec_cmp<mode><mode>" 2344 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2345 (match_operator 1 "comparison_operator" 2346 [(match_operand:VSDQ_I_DI 2 "register_operand") 2347 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2348 "TARGET_SIMD" 2349{ 2350 rtx mask = operands[0]; 2351 enum rtx_code code = GET_CODE (operands[1]); 2352 2353 switch (code) 2354 { 2355 case NE: 2356 case LE: 2357 case LT: 2358 case GE: 2359 case GT: 2360 case EQ: 2361 if (operands[3] == CONST0_RTX (<MODE>mode)) 2362 break; 2363 2364 /* Fall through. */ 2365 default: 2366 if (!REG_P (operands[3])) 2367 operands[3] = force_reg (<MODE>mode, operands[3]); 2368 2369 break; 2370 } 2371 2372 switch (code) 2373 { 2374 case LT: 2375 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); 2376 break; 2377 2378 case GE: 2379 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); 2380 break; 2381 2382 case LE: 2383 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); 2384 break; 2385 2386 case GT: 2387 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); 2388 break; 2389 2390 case LTU: 2391 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); 2392 break; 2393 2394 case GEU: 2395 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); 2396 break; 2397 2398 case LEU: 2399 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); 2400 break; 2401 2402 case GTU: 2403 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); 2404 break; 2405 2406 case NE: 2407 /* Handle NE as !EQ. */ 2408 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2409 emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask)); 2410 break; 2411 2412 case EQ: 2413 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2414 break; 2415 2416 default: 2417 gcc_unreachable (); 2418 } 2419 2420 DONE; 2421}) 2422 2423(define_expand "vec_cmp<mode><v_cmp_result>" 2424 [(set (match_operand:<V_cmp_result> 0 "register_operand") 2425 (match_operator 1 "comparison_operator" 2426 [(match_operand:VDQF 2 "register_operand") 2427 (match_operand:VDQF 3 "nonmemory_operand")]))] 2428 "TARGET_SIMD" 2429{ 2430 int use_zero_form = 0; 2431 enum rtx_code code = GET_CODE (operands[1]); 2432 rtx tmp = gen_reg_rtx (<V_cmp_result>mode); 2433 2434 rtx (*comparison) (rtx, rtx, rtx) = NULL; 2435 2436 switch (code) 2437 { 2438 case LE: 2439 case LT: 2440 case GE: 2441 case GT: 2442 case EQ: 2443 if (operands[3] == CONST0_RTX (<MODE>mode)) 2444 { 2445 use_zero_form = 1; 2446 break; 2447 } 2448 /* Fall through. */ 2449 default: 2450 if (!REG_P (operands[3])) 2451 operands[3] = force_reg (<MODE>mode, operands[3]); 2452 2453 break; 2454 } 2455 2456 switch (code) 2457 { 2458 case LT: 2459 if (use_zero_form) 2460 { 2461 comparison = gen_aarch64_cmlt<mode>; 2462 break; 2463 } 2464 /* Fall through. */ 2465 case UNLT: 2466 std::swap (operands[2], operands[3]); 2467 /* Fall through. */ 2468 case UNGT: 2469 case GT: 2470 comparison = gen_aarch64_cmgt<mode>; 2471 break; 2472 case LE: 2473 if (use_zero_form) 2474 { 2475 comparison = gen_aarch64_cmle<mode>; 2476 break; 2477 } 2478 /* Fall through. */ 2479 case UNLE: 2480 std::swap (operands[2], operands[3]); 2481 /* Fall through. */ 2482 case UNGE: 2483 case GE: 2484 comparison = gen_aarch64_cmge<mode>; 2485 break; 2486 case NE: 2487 case EQ: 2488 comparison = gen_aarch64_cmeq<mode>; 2489 break; 2490 case UNEQ: 2491 case ORDERED: 2492 case UNORDERED: 2493 case LTGT: 2494 break; 2495 default: 2496 gcc_unreachable (); 2497 } 2498 2499 switch (code) 2500 { 2501 case UNGE: 2502 case UNGT: 2503 case UNLE: 2504 case UNLT: 2505 { 2506 /* All of the above must not raise any FP exceptions. Thus we first 2507 check each operand for NaNs and force any elements containing NaN to 2508 zero before using them in the compare. 2509 Example: UN<cc> (a, b) -> UNORDERED (a, b) | 2510 (cm<cc> (isnan (a) ? 0.0 : a, 2511 isnan (b) ? 0.0 : b)) 2512 We use the following transformations for doing the comparisions: 2513 a UNGE b -> a GE b 2514 a UNGT b -> a GT b 2515 a UNLE b -> b GE a 2516 a UNLT b -> b GT a. */ 2517 2518 rtx tmp0 = gen_reg_rtx (<V_cmp_result>mode); 2519 rtx tmp1 = gen_reg_rtx (<V_cmp_result>mode); 2520 rtx tmp2 = gen_reg_rtx (<V_cmp_result>mode); 2521 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); 2522 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); 2523 emit_insn (gen_and<v_cmp_result>3 (tmp2, tmp0, tmp1)); 2524 emit_insn (gen_and<v_cmp_result>3 (tmp0, tmp0, 2525 lowpart_subreg (<V_cmp_result>mode, operands[2], <MODE>mode))); 2526 emit_insn (gen_and<v_cmp_result>3 (tmp1, tmp1, 2527 lowpart_subreg (<V_cmp_result>mode, operands[3], <MODE>mode))); 2528 gcc_assert (comparison != NULL); 2529 emit_insn (comparison (operands[0], 2530 lowpart_subreg (<MODE>mode, tmp0, <V_cmp_result>mode), 2531 lowpart_subreg (<MODE>mode, tmp1, <V_cmp_result>mode))); 2532 emit_insn (gen_orn<v_cmp_result>3 (operands[0], tmp2, operands[0])); 2533 } 2534 break; 2535 2536 case LT: 2537 case LE: 2538 case GT: 2539 case GE: 2540 case EQ: 2541 case NE: 2542 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 2543 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 2544 a GE b -> a GE b 2545 a GT b -> a GT b 2546 a LE b -> b GE a 2547 a LT b -> b GT a 2548 a EQ b -> a EQ b 2549 a NE b -> ~(a EQ b) */ 2550 gcc_assert (comparison != NULL); 2551 emit_insn (comparison (operands[0], operands[2], operands[3])); 2552 if (code == NE) 2553 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0])); 2554 break; 2555 2556 case LTGT: 2557 /* LTGT is not guranteed to not generate a FP exception. So let's 2558 go the faster way : ((a > b) || (b > a)). */ 2559 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 2560 operands[2], operands[3])); 2561 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 2562 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp)); 2563 break; 2564 2565 case ORDERED: 2566 case UNORDERED: 2567 case UNEQ: 2568 /* cmeq (a, a) & cmeq (b, b). */ 2569 emit_insn (gen_aarch64_cmeq<mode> (operands[0], 2570 operands[2], operands[2])); 2571 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); 2572 emit_insn (gen_and<v_cmp_result>3 (operands[0], operands[0], tmp)); 2573 2574 if (code == UNORDERED) 2575 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0])); 2576 else if (code == UNEQ) 2577 { 2578 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); 2579 emit_insn (gen_orn<v_cmp_result>3 (operands[0], operands[0], tmp)); 2580 } 2581 break; 2582 2583 default: 2584 gcc_unreachable (); 2585 } 2586 2587 DONE; 2588}) 2589 2590(define_expand "vec_cmpu<mode><mode>" 2591 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2592 (match_operator 1 "comparison_operator" 2593 [(match_operand:VSDQ_I_DI 2 "register_operand") 2594 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2595 "TARGET_SIMD" 2596{ 2597 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], 2598 operands[2], operands[3])); 2599 DONE; 2600}) 2601 2602(define_expand "vcond<mode><mode>" 2603 [(set (match_operand:VALLDI 0 "register_operand") 2604 (if_then_else:VALLDI 2605 (match_operator 3 "comparison_operator" 2606 [(match_operand:VALLDI 4 "register_operand") 2607 (match_operand:VALLDI 5 "nonmemory_operand")]) 2608 (match_operand:VALLDI 1 "nonmemory_operand") 2609 (match_operand:VALLDI 2 "nonmemory_operand")))] 2610 "TARGET_SIMD" 2611{ 2612 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 2613 enum rtx_code code = GET_CODE (operands[3]); 2614 2615 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2616 it as well as switch operands 1/2 in order to avoid the additional 2617 NOT instruction. */ 2618 if (code == NE) 2619 { 2620 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2621 operands[4], operands[5]); 2622 std::swap (operands[1], operands[2]); 2623 } 2624 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3], 2625 operands[4], operands[5])); 2626 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1], 2627 operands[2], mask)); 2628 2629 DONE; 2630}) 2631 2632(define_expand "vcond<v_cmp_mixed><mode>" 2633 [(set (match_operand:<V_cmp_mixed> 0 "register_operand") 2634 (if_then_else:<V_cmp_mixed> 2635 (match_operator 3 "comparison_operator" 2636 [(match_operand:VDQF_COND 4 "register_operand") 2637 (match_operand:VDQF_COND 5 "nonmemory_operand")]) 2638 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand") 2639 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))] 2640 "TARGET_SIMD" 2641{ 2642 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 2643 enum rtx_code code = GET_CODE (operands[3]); 2644 2645 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2646 it as well as switch operands 1/2 in order to avoid the additional 2647 NOT instruction. */ 2648 if (code == NE) 2649 { 2650 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2651 operands[4], operands[5]); 2652 std::swap (operands[1], operands[2]); 2653 } 2654 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3], 2655 operands[4], operands[5])); 2656 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> ( 2657 operands[0], operands[1], 2658 operands[2], mask)); 2659 2660 DONE; 2661}) 2662 2663(define_expand "vcondu<mode><mode>" 2664 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2665 (if_then_else:VSDQ_I_DI 2666 (match_operator 3 "comparison_operator" 2667 [(match_operand:VSDQ_I_DI 4 "register_operand") 2668 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) 2669 (match_operand:VSDQ_I_DI 1 "nonmemory_operand") 2670 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] 2671 "TARGET_SIMD" 2672{ 2673 rtx mask = gen_reg_rtx (<MODE>mode); 2674 enum rtx_code code = GET_CODE (operands[3]); 2675 2676 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2677 it as well as switch operands 1/2 in order to avoid the additional 2678 NOT instruction. */ 2679 if (code == NE) 2680 { 2681 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2682 operands[4], operands[5]); 2683 std::swap (operands[1], operands[2]); 2684 } 2685 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3], 2686 operands[4], operands[5])); 2687 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1], 2688 operands[2], mask)); 2689 DONE; 2690}) 2691 2692(define_expand "vcondu<mode><v_cmp_mixed>" 2693 [(set (match_operand:VDQF 0 "register_operand") 2694 (if_then_else:VDQF 2695 (match_operator 3 "comparison_operator" 2696 [(match_operand:<V_cmp_mixed> 4 "register_operand") 2697 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")]) 2698 (match_operand:VDQF 1 "nonmemory_operand") 2699 (match_operand:VDQF 2 "nonmemory_operand")))] 2700 "TARGET_SIMD" 2701{ 2702 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 2703 enum rtx_code code = GET_CODE (operands[3]); 2704 2705 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2706 it as well as switch operands 1/2 in order to avoid the additional 2707 NOT instruction. */ 2708 if (code == NE) 2709 { 2710 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2711 operands[4], operands[5]); 2712 std::swap (operands[1], operands[2]); 2713 } 2714 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> ( 2715 mask, operands[3], 2716 operands[4], operands[5])); 2717 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1], 2718 operands[2], mask)); 2719 DONE; 2720}) 2721 2722;; Patterns for AArch64 SIMD Intrinsics. 2723 2724;; Lane extraction with sign extension to general purpose register. 2725(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" 2726 [(set (match_operand:GPI 0 "register_operand" "=r") 2727 (sign_extend:GPI 2728 (vec_select:<VEL> 2729 (match_operand:VDQQH 1 "register_operand" "w") 2730 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2731 "TARGET_SIMD" 2732 { 2733 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 2734 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 2735 } 2736 [(set_attr "type" "neon_to_gp<q>")] 2737) 2738 2739(define_insn "*aarch64_get_lane_zero_extendsi<mode>" 2740 [(set (match_operand:SI 0 "register_operand" "=r") 2741 (zero_extend:SI 2742 (vec_select:<VEL> 2743 (match_operand:VDQQH 1 "register_operand" "w") 2744 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2745 "TARGET_SIMD" 2746 { 2747 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 2748 return "umov\\t%w0, %1.<Vetype>[%2]"; 2749 } 2750 [(set_attr "type" "neon_to_gp<q>")] 2751) 2752 2753;; Lane extraction of a value, neither sign nor zero extension 2754;; is guaranteed so upper bits should be considered undefined. 2755;; RTL uses GCC vector extension indices throughout so flip only for assembly. 2756(define_insn "aarch64_get_lane<mode>" 2757 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 2758 (vec_select:<VEL> 2759 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 2760 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 2761 "TARGET_SIMD" 2762 { 2763 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 2764 switch (which_alternative) 2765 { 2766 case 0: 2767 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 2768 case 1: 2769 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 2770 case 2: 2771 return "st1\\t{%1.<Vetype>}[%2], %0"; 2772 default: 2773 gcc_unreachable (); 2774 } 2775 } 2776 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 2777) 2778 2779;; In this insn, operand 1 should be low, and operand 2 the high part of the 2780;; dest vector. 2781 2782(define_insn "*aarch64_combinez<mode>" 2783 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 2784 (vec_concat:<VDBL> 2785 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m") 2786 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))] 2787 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 2788 "@ 2789 mov\\t%0.8b, %1.8b 2790 fmov\t%d0, %1 2791 ldr\\t%d0, %1" 2792 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 2793 (set_attr "simd" "yes,*,yes") 2794 (set_attr "fp" "*,yes,*")] 2795) 2796 2797(define_insn "*aarch64_combinez_be<mode>" 2798 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 2799 (vec_concat:<VDBL> 2800 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz") 2801 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))] 2802 "TARGET_SIMD && BYTES_BIG_ENDIAN" 2803 "@ 2804 mov\\t%0.8b, %1.8b 2805 fmov\t%d0, %1 2806 ldr\\t%d0, %1" 2807 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 2808 (set_attr "simd" "yes,*,yes") 2809 (set_attr "fp" "*,yes,*")] 2810) 2811 2812(define_expand "aarch64_combine<mode>" 2813 [(match_operand:<VDBL> 0 "register_operand") 2814 (match_operand:VDC 1 "register_operand") 2815 (match_operand:VDC 2 "register_operand")] 2816 "TARGET_SIMD" 2817{ 2818 rtx op1, op2; 2819 if (BYTES_BIG_ENDIAN) 2820 { 2821 op1 = operands[2]; 2822 op2 = operands[1]; 2823 } 2824 else 2825 { 2826 op1 = operands[1]; 2827 op2 = operands[2]; 2828 } 2829 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2)); 2830 DONE; 2831} 2832) 2833 2834(define_insn_and_split "aarch64_combine_internal<mode>" 2835 [(set (match_operand:<VDBL> 0 "register_operand" "=&w") 2836 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") 2837 (match_operand:VDC 2 "register_operand" "w")))] 2838 "TARGET_SIMD" 2839 "#" 2840 "&& reload_completed" 2841 [(const_int 0)] 2842{ 2843 if (BYTES_BIG_ENDIAN) 2844 aarch64_split_simd_combine (operands[0], operands[2], operands[1]); 2845 else 2846 aarch64_split_simd_combine (operands[0], operands[1], operands[2]); 2847 DONE; 2848} 2849[(set_attr "type" "multiple")] 2850) 2851 2852(define_expand "aarch64_simd_combine<mode>" 2853 [(match_operand:<VDBL> 0 "register_operand") 2854 (match_operand:VDC 1 "register_operand") 2855 (match_operand:VDC 2 "register_operand")] 2856 "TARGET_SIMD" 2857 { 2858 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); 2859 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); 2860 DONE; 2861 } 2862[(set_attr "type" "multiple")] 2863) 2864 2865;; <su><addsub>l<q>. 2866 2867(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" 2868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2869 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2870 (match_operand:VQW 1 "register_operand" "w") 2871 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 2872 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2873 (match_operand:VQW 2 "register_operand" "w") 2874 (match_dup 3)))))] 2875 "TARGET_SIMD" 2876 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 2877 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 2878) 2879 2880(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" 2881 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2882 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2883 (match_operand:VQW 1 "register_operand" "w") 2884 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 2885 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2886 (match_operand:VQW 2 "register_operand" "w") 2887 (match_dup 3)))))] 2888 "TARGET_SIMD" 2889 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 2890 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 2891) 2892 2893 2894(define_expand "aarch64_saddl2<mode>" 2895 [(match_operand:<VWIDE> 0 "register_operand" "=w") 2896 (match_operand:VQW 1 "register_operand" "w") 2897 (match_operand:VQW 2 "register_operand" "w")] 2898 "TARGET_SIMD" 2899{ 2900 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 2901 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 2902 operands[2], p)); 2903 DONE; 2904}) 2905 2906(define_expand "aarch64_uaddl2<mode>" 2907 [(match_operand:<VWIDE> 0 "register_operand" "=w") 2908 (match_operand:VQW 1 "register_operand" "w") 2909 (match_operand:VQW 2 "register_operand" "w")] 2910 "TARGET_SIMD" 2911{ 2912 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 2913 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 2914 operands[2], p)); 2915 DONE; 2916}) 2917 2918(define_expand "aarch64_ssubl2<mode>" 2919 [(match_operand:<VWIDE> 0 "register_operand" "=w") 2920 (match_operand:VQW 1 "register_operand" "w") 2921 (match_operand:VQW 2 "register_operand" "w")] 2922 "TARGET_SIMD" 2923{ 2924 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 2925 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 2926 operands[2], p)); 2927 DONE; 2928}) 2929 2930(define_expand "aarch64_usubl2<mode>" 2931 [(match_operand:<VWIDE> 0 "register_operand" "=w") 2932 (match_operand:VQW 1 "register_operand" "w") 2933 (match_operand:VQW 2 "register_operand" "w")] 2934 "TARGET_SIMD" 2935{ 2936 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 2937 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 2938 operands[2], p)); 2939 DONE; 2940}) 2941 2942(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" 2943 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2944 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> 2945 (match_operand:VD_BHSI 1 "register_operand" "w")) 2946 (ANY_EXTEND:<VWIDE> 2947 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 2948 "TARGET_SIMD" 2949 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 2950 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 2951) 2952 2953;; <su><addsub>w<q>. 2954 2955(define_expand "widen_ssum<mode>3" 2956 [(set (match_operand:<VDBLW> 0 "register_operand" "") 2957 (plus:<VDBLW> (sign_extend:<VDBLW> 2958 (match_operand:VQW 1 "register_operand" "")) 2959 (match_operand:<VDBLW> 2 "register_operand" "")))] 2960 "TARGET_SIMD" 2961 { 2962 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 2963 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 2964 2965 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 2966 operands[1], p)); 2967 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 2968 DONE; 2969 } 2970) 2971 2972(define_expand "widen_ssum<mode>3" 2973 [(set (match_operand:<VWIDE> 0 "register_operand" "") 2974 (plus:<VWIDE> (sign_extend:<VWIDE> 2975 (match_operand:VD_BHSI 1 "register_operand" "")) 2976 (match_operand:<VWIDE> 2 "register_operand" "")))] 2977 "TARGET_SIMD" 2978{ 2979 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); 2980 DONE; 2981}) 2982 2983(define_expand "widen_usum<mode>3" 2984 [(set (match_operand:<VDBLW> 0 "register_operand" "") 2985 (plus:<VDBLW> (zero_extend:<VDBLW> 2986 (match_operand:VQW 1 "register_operand" "")) 2987 (match_operand:<VDBLW> 2 "register_operand" "")))] 2988 "TARGET_SIMD" 2989 { 2990 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 2991 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 2992 2993 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 2994 operands[1], p)); 2995 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 2996 DONE; 2997 } 2998) 2999 3000(define_expand "widen_usum<mode>3" 3001 [(set (match_operand:<VWIDE> 0 "register_operand" "") 3002 (plus:<VWIDE> (zero_extend:<VWIDE> 3003 (match_operand:VD_BHSI 1 "register_operand" "")) 3004 (match_operand:<VWIDE> 2 "register_operand" "")))] 3005 "TARGET_SIMD" 3006{ 3007 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 3008 DONE; 3009}) 3010 3011(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" 3012 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3013 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3014 (ANY_EXTEND:<VWIDE> 3015 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3016 "TARGET_SIMD" 3017 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3018 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3019) 3020 3021(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal" 3022 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3023 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3024 (ANY_EXTEND:<VWIDE> 3025 (vec_select:<VHALF> 3026 (match_operand:VQW 2 "register_operand" "w") 3027 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 3028 "TARGET_SIMD" 3029 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3030 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3031) 3032 3033(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" 3034 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3035 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3036 (ANY_EXTEND:<VWIDE> 3037 (vec_select:<VHALF> 3038 (match_operand:VQW 2 "register_operand" "w") 3039 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 3040 "TARGET_SIMD" 3041 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3042 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3043) 3044 3045(define_expand "aarch64_saddw2<mode>" 3046 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3047 (match_operand:<VWIDE> 1 "register_operand" "w") 3048 (match_operand:VQW 2 "register_operand" "w")] 3049 "TARGET_SIMD" 3050{ 3051 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3052 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 3053 operands[2], p)); 3054 DONE; 3055}) 3056 3057(define_expand "aarch64_uaddw2<mode>" 3058 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3059 (match_operand:<VWIDE> 1 "register_operand" "w") 3060 (match_operand:VQW 2 "register_operand" "w")] 3061 "TARGET_SIMD" 3062{ 3063 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3064 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 3065 operands[2], p)); 3066 DONE; 3067}) 3068 3069 3070(define_expand "aarch64_ssubw2<mode>" 3071 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3072 (match_operand:<VWIDE> 1 "register_operand" "w") 3073 (match_operand:VQW 2 "register_operand" "w")] 3074 "TARGET_SIMD" 3075{ 3076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3077 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 3078 operands[2], p)); 3079 DONE; 3080}) 3081 3082(define_expand "aarch64_usubw2<mode>" 3083 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3084 (match_operand:<VWIDE> 1 "register_operand" "w") 3085 (match_operand:VQW 2 "register_operand" "w")] 3086 "TARGET_SIMD" 3087{ 3088 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3089 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 3090 operands[2], p)); 3091 DONE; 3092}) 3093 3094;; <su><r>h<addsub>. 3095 3096(define_insn "aarch64_<sur>h<addsub><mode>" 3097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 3099 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 3100 HADDSUB))] 3101 "TARGET_SIMD" 3102 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3103 [(set_attr "type" "neon_<addsub>_halve<q>")] 3104) 3105 3106;; <r><addsub>hn<q>. 3107 3108(define_insn "aarch64_<sur><addsub>hn<mode>" 3109 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3110 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 3111 (match_operand:VQN 2 "register_operand" "w")] 3112 ADDSUBHN))] 3113 "TARGET_SIMD" 3114 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" 3115 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3116) 3117 3118(define_insn "aarch64_<sur><addsub>hn2<mode>" 3119 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 3120 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 3121 (match_operand:VQN 2 "register_operand" "w") 3122 (match_operand:VQN 3 "register_operand" "w")] 3123 ADDSUBHN2))] 3124 "TARGET_SIMD" 3125 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" 3126 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3127) 3128 3129;; pmul. 3130 3131(define_insn "aarch64_pmul<mode>" 3132 [(set (match_operand:VB 0 "register_operand" "=w") 3133 (unspec:VB [(match_operand:VB 1 "register_operand" "w") 3134 (match_operand:VB 2 "register_operand" "w")] 3135 UNSPEC_PMUL))] 3136 "TARGET_SIMD" 3137 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3138 [(set_attr "type" "neon_mul_<Vetype><q>")] 3139) 3140 3141;; fmulx. 3142 3143(define_insn "aarch64_fmulx<mode>" 3144 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 3145 (unspec:VHSDF_HSDF 3146 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 3147 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 3148 UNSPEC_FMULX))] 3149 "TARGET_SIMD" 3150 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3151 [(set_attr "type" "neon_fp_mul_<stype>")] 3152) 3153 3154;; vmulxq_lane_f32, and vmulx_laneq_f32 3155 3156(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" 3157 [(set (match_operand:VDQSF 0 "register_operand" "=w") 3158 (unspec:VDQSF 3159 [(match_operand:VDQSF 1 "register_operand" "w") 3160 (vec_duplicate:VDQSF 3161 (vec_select:<VEL> 3162 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 3163 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3164 UNSPEC_FMULX))] 3165 "TARGET_SIMD" 3166 { 3167 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 3168 INTVAL (operands[3]))); 3169 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3170 } 3171 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 3172) 3173 3174;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 3175 3176(define_insn "*aarch64_mulx_elt<mode>" 3177 [(set (match_operand:VDQF 0 "register_operand" "=w") 3178 (unspec:VDQF 3179 [(match_operand:VDQF 1 "register_operand" "w") 3180 (vec_duplicate:VDQF 3181 (vec_select:<VEL> 3182 (match_operand:VDQF 2 "register_operand" "w") 3183 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3184 UNSPEC_FMULX))] 3185 "TARGET_SIMD" 3186 { 3187 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 3188 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3189 } 3190 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 3191) 3192 3193;; vmulxq_lane 3194 3195(define_insn "*aarch64_mulx_elt_from_dup<mode>" 3196 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3197 (unspec:VHSDF 3198 [(match_operand:VHSDF 1 "register_operand" "w") 3199 (vec_duplicate:VHSDF 3200 (match_operand:<VEL> 2 "register_operand" "<h_con>"))] 3201 UNSPEC_FMULX))] 3202 "TARGET_SIMD" 3203 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; 3204 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 3205) 3206 3207;; vmulxs_lane_f32, vmulxs_laneq_f32 3208;; vmulxd_lane_f64 == vmulx_lane_f64 3209;; vmulxd_laneq_f64 == vmulx_laneq_f64 3210 3211(define_insn "*aarch64_vgetfmulx<mode>" 3212 [(set (match_operand:<VEL> 0 "register_operand" "=w") 3213 (unspec:<VEL> 3214 [(match_operand:<VEL> 1 "register_operand" "w") 3215 (vec_select:<VEL> 3216 (match_operand:VDQF 2 "register_operand" "w") 3217 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3218 UNSPEC_FMULX))] 3219 "TARGET_SIMD" 3220 { 3221 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 3222 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 3223 } 3224 [(set_attr "type" "fmul<Vetype>")] 3225) 3226;; <su>q<addsub> 3227 3228(define_insn "aarch64_<su_optab><optab><mode>" 3229 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3230 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") 3231 (match_operand:VSDQ_I 2 "register_operand" "w")))] 3232 "TARGET_SIMD" 3233 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3234 [(set_attr "type" "neon_<optab><q>")] 3235) 3236 3237;; suqadd and usqadd 3238 3239(define_insn "aarch64_<sur>qadd<mode>" 3240 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3241 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") 3242 (match_operand:VSDQ_I 2 "register_operand" "w")] 3243 USSUQADD))] 3244 "TARGET_SIMD" 3245 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" 3246 [(set_attr "type" "neon_qadd<q>")] 3247) 3248 3249;; sqmovun 3250 3251(define_insn "aarch64_sqmovun<mode>" 3252 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3253 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3254 UNSPEC_SQXTUN))] 3255 "TARGET_SIMD" 3256 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3257 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3258) 3259 3260;; sqmovn and uqmovn 3261 3262(define_insn "aarch64_<sur>qmovn<mode>" 3263 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3264 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3265 SUQMOVN))] 3266 "TARGET_SIMD" 3267 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3268 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3269) 3270 3271;; <su>q<absneg> 3272 3273(define_insn "aarch64_s<optab><mode>" 3274 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3275 (UNQOPS:VSDQ_I 3276 (match_operand:VSDQ_I 1 "register_operand" "w")))] 3277 "TARGET_SIMD" 3278 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 3279 [(set_attr "type" "neon_<optab><q>")] 3280) 3281 3282;; sq<r>dmulh. 3283 3284(define_insn "aarch64_sq<r>dmulh<mode>" 3285 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3286 (unspec:VSDQ_HSI 3287 [(match_operand:VSDQ_HSI 1 "register_operand" "w") 3288 (match_operand:VSDQ_HSI 2 "register_operand" "w")] 3289 VQDMULH))] 3290 "TARGET_SIMD" 3291 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3292 [(set_attr "type" "neon_sat_mul_<Vetype><q>")] 3293) 3294 3295;; sq<r>dmulh_lane 3296 3297(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3298 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3299 (unspec:VDQHS 3300 [(match_operand:VDQHS 1 "register_operand" "w") 3301 (vec_select:<VEL> 3302 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3303 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3304 VQDMULH))] 3305 "TARGET_SIMD" 3306 "* 3307 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3308 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3309 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3310) 3311 3312(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3313 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3314 (unspec:VDQHS 3315 [(match_operand:VDQHS 1 "register_operand" "w") 3316 (vec_select:<VEL> 3317 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3318 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3319 VQDMULH))] 3320 "TARGET_SIMD" 3321 "* 3322 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3323 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3324 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3325) 3326 3327(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3328 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3329 (unspec:SD_HSI 3330 [(match_operand:SD_HSI 1 "register_operand" "w") 3331 (vec_select:<VEL> 3332 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3333 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3334 VQDMULH))] 3335 "TARGET_SIMD" 3336 "* 3337 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3338 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3339 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3340) 3341 3342(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3343 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3344 (unspec:SD_HSI 3345 [(match_operand:SD_HSI 1 "register_operand" "w") 3346 (vec_select:<VEL> 3347 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3348 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3349 VQDMULH))] 3350 "TARGET_SIMD" 3351 "* 3352 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3353 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3354 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3355) 3356 3357;; sqrdml[as]h. 3358 3359(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" 3360 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3361 (unspec:VSDQ_HSI 3362 [(match_operand:VSDQ_HSI 1 "register_operand" "0") 3363 (match_operand:VSDQ_HSI 2 "register_operand" "w") 3364 (match_operand:VSDQ_HSI 3 "register_operand" "w")] 3365 SQRDMLH_AS))] 3366 "TARGET_SIMD_RDMA" 3367 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3368 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3369) 3370 3371;; sqrdml[as]h_lane. 3372 3373(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3374 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3375 (unspec:VDQHS 3376 [(match_operand:VDQHS 1 "register_operand" "0") 3377 (match_operand:VDQHS 2 "register_operand" "w") 3378 (vec_select:<VEL> 3379 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3380 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3381 SQRDMLH_AS))] 3382 "TARGET_SIMD_RDMA" 3383 { 3384 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3385 return 3386 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3387 } 3388 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3389) 3390 3391(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3392 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3393 (unspec:SD_HSI 3394 [(match_operand:SD_HSI 1 "register_operand" "0") 3395 (match_operand:SD_HSI 2 "register_operand" "w") 3396 (vec_select:<VEL> 3397 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3398 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3399 SQRDMLH_AS))] 3400 "TARGET_SIMD_RDMA" 3401 { 3402 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3403 return 3404 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 3405 } 3406 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3407) 3408 3409;; sqrdml[as]h_laneq. 3410 3411(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3412 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3413 (unspec:VDQHS 3414 [(match_operand:VDQHS 1 "register_operand" "0") 3415 (match_operand:VDQHS 2 "register_operand" "w") 3416 (vec_select:<VEL> 3417 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3418 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3419 SQRDMLH_AS))] 3420 "TARGET_SIMD_RDMA" 3421 { 3422 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3423 return 3424 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3425 } 3426 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3427) 3428 3429(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3430 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3431 (unspec:SD_HSI 3432 [(match_operand:SD_HSI 1 "register_operand" "0") 3433 (match_operand:SD_HSI 2 "register_operand" "w") 3434 (vec_select:<VEL> 3435 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3436 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3437 SQRDMLH_AS))] 3438 "TARGET_SIMD_RDMA" 3439 { 3440 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3441 return 3442 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 3443 } 3444 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3445) 3446 3447;; vqdml[sa]l 3448 3449(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" 3450 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3451 (SBINQOPS:<VWIDE> 3452 (match_operand:<VWIDE> 1 "register_operand" "0") 3453 (ss_ashift:<VWIDE> 3454 (mult:<VWIDE> 3455 (sign_extend:<VWIDE> 3456 (match_operand:VSD_HSI 2 "register_operand" "w")) 3457 (sign_extend:<VWIDE> 3458 (match_operand:VSD_HSI 3 "register_operand" "w"))) 3459 (const_int 1))))] 3460 "TARGET_SIMD" 3461 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3462 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3463) 3464 3465;; vqdml[sa]l_lane 3466 3467(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3468 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3469 (SBINQOPS:<VWIDE> 3470 (match_operand:<VWIDE> 1 "register_operand" "0") 3471 (ss_ashift:<VWIDE> 3472 (mult:<VWIDE> 3473 (sign_extend:<VWIDE> 3474 (match_operand:VD_HSI 2 "register_operand" "w")) 3475 (sign_extend:<VWIDE> 3476 (vec_duplicate:VD_HSI 3477 (vec_select:<VEL> 3478 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3479 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3480 )) 3481 (const_int 1))))] 3482 "TARGET_SIMD" 3483 { 3484 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3485 return 3486 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3487 } 3488 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3489) 3490 3491(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3492 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3493 (SBINQOPS:<VWIDE> 3494 (match_operand:<VWIDE> 1 "register_operand" "0") 3495 (ss_ashift:<VWIDE> 3496 (mult:<VWIDE> 3497 (sign_extend:<VWIDE> 3498 (match_operand:VD_HSI 2 "register_operand" "w")) 3499 (sign_extend:<VWIDE> 3500 (vec_duplicate:VD_HSI 3501 (vec_select:<VEL> 3502 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3503 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3504 )) 3505 (const_int 1))))] 3506 "TARGET_SIMD" 3507 { 3508 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3509 return 3510 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3511 } 3512 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3513) 3514 3515(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3516 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3517 (SBINQOPS:<VWIDE> 3518 (match_operand:<VWIDE> 1 "register_operand" "0") 3519 (ss_ashift:<VWIDE> 3520 (mult:<VWIDE> 3521 (sign_extend:<VWIDE> 3522 (match_operand:SD_HSI 2 "register_operand" "w")) 3523 (sign_extend:<VWIDE> 3524 (vec_select:<VEL> 3525 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3526 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3527 ) 3528 (const_int 1))))] 3529 "TARGET_SIMD" 3530 { 3531 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3532 return 3533 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3534 } 3535 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3536) 3537 3538(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3539 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3540 (SBINQOPS:<VWIDE> 3541 (match_operand:<VWIDE> 1 "register_operand" "0") 3542 (ss_ashift:<VWIDE> 3543 (mult:<VWIDE> 3544 (sign_extend:<VWIDE> 3545 (match_operand:SD_HSI 2 "register_operand" "w")) 3546 (sign_extend:<VWIDE> 3547 (vec_select:<VEL> 3548 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3549 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3550 ) 3551 (const_int 1))))] 3552 "TARGET_SIMD" 3553 { 3554 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3555 return 3556 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3557 } 3558 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3559) 3560 3561;; vqdml[sa]l_n 3562 3563(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" 3564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3565 (SBINQOPS:<VWIDE> 3566 (match_operand:<VWIDE> 1 "register_operand" "0") 3567 (ss_ashift:<VWIDE> 3568 (mult:<VWIDE> 3569 (sign_extend:<VWIDE> 3570 (match_operand:VD_HSI 2 "register_operand" "w")) 3571 (sign_extend:<VWIDE> 3572 (vec_duplicate:VD_HSI 3573 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 3574 (const_int 1))))] 3575 "TARGET_SIMD" 3576 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 3577 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3578) 3579 3580;; sqdml[as]l2 3581 3582(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" 3583 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3584 (SBINQOPS:<VWIDE> 3585 (match_operand:<VWIDE> 1 "register_operand" "0") 3586 (ss_ashift:<VWIDE> 3587 (mult:<VWIDE> 3588 (sign_extend:<VWIDE> 3589 (vec_select:<VHALF> 3590 (match_operand:VQ_HSI 2 "register_operand" "w") 3591 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 3592 (sign_extend:<VWIDE> 3593 (vec_select:<VHALF> 3594 (match_operand:VQ_HSI 3 "register_operand" "w") 3595 (match_dup 4)))) 3596 (const_int 1))))] 3597 "TARGET_SIMD" 3598 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3599 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3600) 3601 3602(define_expand "aarch64_sqdmlal2<mode>" 3603 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3604 (match_operand:<VWIDE> 1 "register_operand" "w") 3605 (match_operand:VQ_HSI 2 "register_operand" "w") 3606 (match_operand:VQ_HSI 3 "register_operand" "w")] 3607 "TARGET_SIMD" 3608{ 3609 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3610 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 3611 operands[2], operands[3], p)); 3612 DONE; 3613}) 3614 3615(define_expand "aarch64_sqdmlsl2<mode>" 3616 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3617 (match_operand:<VWIDE> 1 "register_operand" "w") 3618 (match_operand:VQ_HSI 2 "register_operand" "w") 3619 (match_operand:VQ_HSI 3 "register_operand" "w")] 3620 "TARGET_SIMD" 3621{ 3622 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3623 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 3624 operands[2], operands[3], p)); 3625 DONE; 3626}) 3627 3628;; vqdml[sa]l2_lane 3629 3630(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" 3631 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3632 (SBINQOPS:<VWIDE> 3633 (match_operand:<VWIDE> 1 "register_operand" "0") 3634 (ss_ashift:<VWIDE> 3635 (mult:<VWIDE> 3636 (sign_extend:<VWIDE> 3637 (vec_select:<VHALF> 3638 (match_operand:VQ_HSI 2 "register_operand" "w") 3639 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 3640 (sign_extend:<VWIDE> 3641 (vec_duplicate:<VHALF> 3642 (vec_select:<VEL> 3643 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3644 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 3645 )))) 3646 (const_int 1))))] 3647 "TARGET_SIMD" 3648 { 3649 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3650 return 3651 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3652 } 3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3654) 3655 3656(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" 3657 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3658 (SBINQOPS:<VWIDE> 3659 (match_operand:<VWIDE> 1 "register_operand" "0") 3660 (ss_ashift:<VWIDE> 3661 (mult:<VWIDE> 3662 (sign_extend:<VWIDE> 3663 (vec_select:<VHALF> 3664 (match_operand:VQ_HSI 2 "register_operand" "w") 3665 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 3666 (sign_extend:<VWIDE> 3667 (vec_duplicate:<VHALF> 3668 (vec_select:<VEL> 3669 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3670 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 3671 )))) 3672 (const_int 1))))] 3673 "TARGET_SIMD" 3674 { 3675 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3676 return 3677 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3678 } 3679 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3680) 3681 3682(define_expand "aarch64_sqdmlal2_lane<mode>" 3683 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3684 (match_operand:<VWIDE> 1 "register_operand" "w") 3685 (match_operand:VQ_HSI 2 "register_operand" "w") 3686 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3687 (match_operand:SI 4 "immediate_operand" "i")] 3688 "TARGET_SIMD" 3689{ 3690 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3691 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 3692 operands[2], operands[3], 3693 operands[4], p)); 3694 DONE; 3695}) 3696 3697(define_expand "aarch64_sqdmlal2_laneq<mode>" 3698 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3699 (match_operand:<VWIDE> 1 "register_operand" "w") 3700 (match_operand:VQ_HSI 2 "register_operand" "w") 3701 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3702 (match_operand:SI 4 "immediate_operand" "i")] 3703 "TARGET_SIMD" 3704{ 3705 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3706 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 3707 operands[2], operands[3], 3708 operands[4], p)); 3709 DONE; 3710}) 3711 3712(define_expand "aarch64_sqdmlsl2_lane<mode>" 3713 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3714 (match_operand:<VWIDE> 1 "register_operand" "w") 3715 (match_operand:VQ_HSI 2 "register_operand" "w") 3716 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3717 (match_operand:SI 4 "immediate_operand" "i")] 3718 "TARGET_SIMD" 3719{ 3720 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3721 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 3722 operands[2], operands[3], 3723 operands[4], p)); 3724 DONE; 3725}) 3726 3727(define_expand "aarch64_sqdmlsl2_laneq<mode>" 3728 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3729 (match_operand:<VWIDE> 1 "register_operand" "w") 3730 (match_operand:VQ_HSI 2 "register_operand" "w") 3731 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3732 (match_operand:SI 4 "immediate_operand" "i")] 3733 "TARGET_SIMD" 3734{ 3735 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3736 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 3737 operands[2], operands[3], 3738 operands[4], p)); 3739 DONE; 3740}) 3741 3742(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" 3743 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3744 (SBINQOPS:<VWIDE> 3745 (match_operand:<VWIDE> 1 "register_operand" "0") 3746 (ss_ashift:<VWIDE> 3747 (mult:<VWIDE> 3748 (sign_extend:<VWIDE> 3749 (vec_select:<VHALF> 3750 (match_operand:VQ_HSI 2 "register_operand" "w") 3751 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 3752 (sign_extend:<VWIDE> 3753 (vec_duplicate:<VHALF> 3754 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 3755 (const_int 1))))] 3756 "TARGET_SIMD" 3757 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 3758 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3759) 3760 3761(define_expand "aarch64_sqdmlal2_n<mode>" 3762 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3763 (match_operand:<VWIDE> 1 "register_operand" "w") 3764 (match_operand:VQ_HSI 2 "register_operand" "w") 3765 (match_operand:<VEL> 3 "register_operand" "w")] 3766 "TARGET_SIMD" 3767{ 3768 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3769 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 3770 operands[2], operands[3], 3771 p)); 3772 DONE; 3773}) 3774 3775(define_expand "aarch64_sqdmlsl2_n<mode>" 3776 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3777 (match_operand:<VWIDE> 1 "register_operand" "w") 3778 (match_operand:VQ_HSI 2 "register_operand" "w") 3779 (match_operand:<VEL> 3 "register_operand" "w")] 3780 "TARGET_SIMD" 3781{ 3782 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3783 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 3784 operands[2], operands[3], 3785 p)); 3786 DONE; 3787}) 3788 3789;; vqdmull 3790 3791(define_insn "aarch64_sqdmull<mode>" 3792 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3793 (ss_ashift:<VWIDE> 3794 (mult:<VWIDE> 3795 (sign_extend:<VWIDE> 3796 (match_operand:VSD_HSI 1 "register_operand" "w")) 3797 (sign_extend:<VWIDE> 3798 (match_operand:VSD_HSI 2 "register_operand" "w"))) 3799 (const_int 1)))] 3800 "TARGET_SIMD" 3801 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3802 [(set_attr "type" "neon_sat_mul_<Vetype>_long")] 3803) 3804 3805;; vqdmull_lane 3806 3807(define_insn "aarch64_sqdmull_lane<mode>" 3808 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3809 (ss_ashift:<VWIDE> 3810 (mult:<VWIDE> 3811 (sign_extend:<VWIDE> 3812 (match_operand:VD_HSI 1 "register_operand" "w")) 3813 (sign_extend:<VWIDE> 3814 (vec_duplicate:VD_HSI 3815 (vec_select:<VEL> 3816 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3817 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 3818 )) 3819 (const_int 1)))] 3820 "TARGET_SIMD" 3821 { 3822 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3823 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3824 } 3825 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3826) 3827 3828(define_insn "aarch64_sqdmull_laneq<mode>" 3829 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3830 (ss_ashift:<VWIDE> 3831 (mult:<VWIDE> 3832 (sign_extend:<VWIDE> 3833 (match_operand:VD_HSI 1 "register_operand" "w")) 3834 (sign_extend:<VWIDE> 3835 (vec_duplicate:VD_HSI 3836 (vec_select:<VEL> 3837 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3838 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 3839 )) 3840 (const_int 1)))] 3841 "TARGET_SIMD" 3842 { 3843 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3844 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3845 } 3846 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3847) 3848 3849(define_insn "aarch64_sqdmull_lane<mode>" 3850 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3851 (ss_ashift:<VWIDE> 3852 (mult:<VWIDE> 3853 (sign_extend:<VWIDE> 3854 (match_operand:SD_HSI 1 "register_operand" "w")) 3855 (sign_extend:<VWIDE> 3856 (vec_select:<VEL> 3857 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3858 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 3859 )) 3860 (const_int 1)))] 3861 "TARGET_SIMD" 3862 { 3863 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3864 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3865 } 3866 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3867) 3868 3869(define_insn "aarch64_sqdmull_laneq<mode>" 3870 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3871 (ss_ashift:<VWIDE> 3872 (mult:<VWIDE> 3873 (sign_extend:<VWIDE> 3874 (match_operand:SD_HSI 1 "register_operand" "w")) 3875 (sign_extend:<VWIDE> 3876 (vec_select:<VEL> 3877 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3878 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 3879 )) 3880 (const_int 1)))] 3881 "TARGET_SIMD" 3882 { 3883 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3884 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3885 } 3886 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3887) 3888 3889;; vqdmull_n 3890 3891(define_insn "aarch64_sqdmull_n<mode>" 3892 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3893 (ss_ashift:<VWIDE> 3894 (mult:<VWIDE> 3895 (sign_extend:<VWIDE> 3896 (match_operand:VD_HSI 1 "register_operand" "w")) 3897 (sign_extend:<VWIDE> 3898 (vec_duplicate:VD_HSI 3899 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 3900 ) 3901 (const_int 1)))] 3902 "TARGET_SIMD" 3903 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 3904 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3905) 3906 3907;; vqdmull2 3908 3909 3910 3911(define_insn "aarch64_sqdmull2<mode>_internal" 3912 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3913 (ss_ashift:<VWIDE> 3914 (mult:<VWIDE> 3915 (sign_extend:<VWIDE> 3916 (vec_select:<VHALF> 3917 (match_operand:VQ_HSI 1 "register_operand" "w") 3918 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 3919 (sign_extend:<VWIDE> 3920 (vec_select:<VHALF> 3921 (match_operand:VQ_HSI 2 "register_operand" "w") 3922 (match_dup 3))) 3923 ) 3924 (const_int 1)))] 3925 "TARGET_SIMD" 3926 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3927 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3928) 3929 3930(define_expand "aarch64_sqdmull2<mode>" 3931 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3932 (match_operand:VQ_HSI 1 "register_operand" "w") 3933 (match_operand:VQ_HSI 2 "register_operand" "w")] 3934 "TARGET_SIMD" 3935{ 3936 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3937 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 3938 operands[2], p)); 3939 DONE; 3940}) 3941 3942;; vqdmull2_lane 3943 3944(define_insn "aarch64_sqdmull2_lane<mode>_internal" 3945 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3946 (ss_ashift:<VWIDE> 3947 (mult:<VWIDE> 3948 (sign_extend:<VWIDE> 3949 (vec_select:<VHALF> 3950 (match_operand:VQ_HSI 1 "register_operand" "w") 3951 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 3952 (sign_extend:<VWIDE> 3953 (vec_duplicate:<VHALF> 3954 (vec_select:<VEL> 3955 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3956 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 3957 )) 3958 (const_int 1)))] 3959 "TARGET_SIMD" 3960 { 3961 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3962 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3963 } 3964 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3965) 3966 3967(define_insn "aarch64_sqdmull2_laneq<mode>_internal" 3968 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3969 (ss_ashift:<VWIDE> 3970 (mult:<VWIDE> 3971 (sign_extend:<VWIDE> 3972 (vec_select:<VHALF> 3973 (match_operand:VQ_HSI 1 "register_operand" "w") 3974 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 3975 (sign_extend:<VWIDE> 3976 (vec_duplicate:<VHALF> 3977 (vec_select:<VEL> 3978 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3979 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 3980 )) 3981 (const_int 1)))] 3982 "TARGET_SIMD" 3983 { 3984 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3985 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3986 } 3987 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 3988) 3989 3990(define_expand "aarch64_sqdmull2_lane<mode>" 3991 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3992 (match_operand:VQ_HSI 1 "register_operand" "w") 3993 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3994 (match_operand:SI 3 "immediate_operand" "i")] 3995 "TARGET_SIMD" 3996{ 3997 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3998 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 3999 operands[2], operands[3], 4000 p)); 4001 DONE; 4002}) 4003 4004(define_expand "aarch64_sqdmull2_laneq<mode>" 4005 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4006 (match_operand:VQ_HSI 1 "register_operand" "w") 4007 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4008 (match_operand:SI 3 "immediate_operand" "i")] 4009 "TARGET_SIMD" 4010{ 4011 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4012 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 4013 operands[2], operands[3], 4014 p)); 4015 DONE; 4016}) 4017 4018;; vqdmull2_n 4019 4020(define_insn "aarch64_sqdmull2_n<mode>_internal" 4021 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4022 (ss_ashift:<VWIDE> 4023 (mult:<VWIDE> 4024 (sign_extend:<VWIDE> 4025 (vec_select:<VHALF> 4026 (match_operand:VQ_HSI 1 "register_operand" "w") 4027 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4028 (sign_extend:<VWIDE> 4029 (vec_duplicate:<VHALF> 4030 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4031 ) 4032 (const_int 1)))] 4033 "TARGET_SIMD" 4034 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4035 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4036) 4037 4038(define_expand "aarch64_sqdmull2_n<mode>" 4039 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4040 (match_operand:VQ_HSI 1 "register_operand" "w") 4041 (match_operand:<VEL> 2 "register_operand" "w")] 4042 "TARGET_SIMD" 4043{ 4044 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4045 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 4046 operands[2], p)); 4047 DONE; 4048}) 4049 4050;; vshl 4051 4052(define_insn "aarch64_<sur>shl<mode>" 4053 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4054 (unspec:VSDQ_I_DI 4055 [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4056 (match_operand:VSDQ_I_DI 2 "register_operand" "w")] 4057 VSHL))] 4058 "TARGET_SIMD" 4059 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4060 [(set_attr "type" "neon_shift_reg<q>")] 4061) 4062 4063 4064;; vqshl 4065 4066(define_insn "aarch64_<sur>q<r>shl<mode>" 4067 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4068 (unspec:VSDQ_I 4069 [(match_operand:VSDQ_I 1 "register_operand" "w") 4070 (match_operand:VSDQ_I 2 "register_operand" "w")] 4071 VQSHL))] 4072 "TARGET_SIMD" 4073 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4074 [(set_attr "type" "neon_sat_shift_reg<q>")] 4075) 4076 4077;; vshll_n 4078 4079(define_insn "aarch64_<sur>shll_n<mode>" 4080 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4081 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 4082 (match_operand:SI 2 4083 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 4084 VSHLL))] 4085 "TARGET_SIMD" 4086 { 4087 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4088 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4089 else 4090 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4091 } 4092 [(set_attr "type" "neon_shift_imm_long")] 4093) 4094 4095;; vshll_high_n 4096 4097(define_insn "aarch64_<sur>shll2_n<mode>" 4098 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4099 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 4100 (match_operand:SI 2 "immediate_operand" "i")] 4101 VSHLL))] 4102 "TARGET_SIMD" 4103 { 4104 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4105 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4106 else 4107 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4108 } 4109 [(set_attr "type" "neon_shift_imm_long")] 4110) 4111 4112;; vrshr_n 4113 4114(define_insn "aarch64_<sur>shr_n<mode>" 4115 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4116 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4117 (match_operand:SI 2 4118 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4119 VRSHR_N))] 4120 "TARGET_SIMD" 4121 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4122 [(set_attr "type" "neon_sat_shift_imm<q>")] 4123) 4124 4125;; v(r)sra_n 4126 4127(define_insn "aarch64_<sur>sra_n<mode>" 4128 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4129 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4130 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4131 (match_operand:SI 3 4132 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4133 VSRA))] 4134 "TARGET_SIMD" 4135 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4136 [(set_attr "type" "neon_shift_acc<q>")] 4137) 4138 4139;; vs<lr>i_n 4140 4141(define_insn "aarch64_<sur>s<lr>i_n<mode>" 4142 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4143 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4144 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4145 (match_operand:SI 3 4146 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] 4147 VSLRI))] 4148 "TARGET_SIMD" 4149 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4150 [(set_attr "type" "neon_shift_imm<q>")] 4151) 4152 4153;; vqshl(u) 4154 4155(define_insn "aarch64_<sur>qshl<u>_n<mode>" 4156 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4157 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") 4158 (match_operand:SI 2 4159 "aarch64_simd_shift_imm_<ve_mode>" "i")] 4160 VQSHL_N))] 4161 "TARGET_SIMD" 4162 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4163 [(set_attr "type" "neon_sat_shift_imm<q>")] 4164) 4165 4166 4167;; vq(r)shr(u)n_n 4168 4169(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" 4170 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4171 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") 4172 (match_operand:SI 2 4173 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4174 VQSHRN_N))] 4175 "TARGET_SIMD" 4176 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" 4177 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4178) 4179 4180 4181;; cm(eq|ge|gt|lt|le) 4182;; Note, we have constraints for Dz and Z as different expanders 4183;; have different ideas of what should be passed to this pattern. 4184 4185(define_insn "aarch64_cm<optab><mode>" 4186 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") 4187 (neg:<V_cmp_result> 4188 (COMPARISONS:<V_cmp_result> 4189 (match_operand:VDQ_I 1 "register_operand" "w,w") 4190 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4191 )))] 4192 "TARGET_SIMD" 4193 "@ 4194 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4195 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" 4196 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] 4197) 4198 4199(define_insn_and_split "aarch64_cm<optab>di" 4200 [(set (match_operand:DI 0 "register_operand" "=w,w,r") 4201 (neg:DI 4202 (COMPARISONS:DI 4203 (match_operand:DI 1 "register_operand" "w,w,r") 4204 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 4205 ))) 4206 (clobber (reg:CC CC_REGNUM))] 4207 "TARGET_SIMD" 4208 "#" 4209 "reload_completed" 4210 [(set (match_operand:DI 0 "register_operand") 4211 (neg:DI 4212 (COMPARISONS:DI 4213 (match_operand:DI 1 "register_operand") 4214 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4215 )))] 4216 { 4217 /* If we are in the general purpose register file, 4218 we split to a sequence of comparison and store. */ 4219 if (GP_REGNUM_P (REGNO (operands[0])) 4220 && GP_REGNUM_P (REGNO (operands[1]))) 4221 { 4222 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); 4223 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4224 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4225 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4226 DONE; 4227 } 4228 /* Otherwise, we expand to a similar pattern which does not 4229 clobber CC_REGNUM. */ 4230 } 4231 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] 4232) 4233 4234(define_insn "*aarch64_cm<optab>di" 4235 [(set (match_operand:DI 0 "register_operand" "=w,w") 4236 (neg:DI 4237 (COMPARISONS:DI 4238 (match_operand:DI 1 "register_operand" "w,w") 4239 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4240 )))] 4241 "TARGET_SIMD && reload_completed" 4242 "@ 4243 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> 4244 cm<optab>\t%d0, %d1, #0" 4245 [(set_attr "type" "neon_compare, neon_compare_zero")] 4246) 4247 4248;; cm(hs|hi) 4249 4250(define_insn "aarch64_cm<optab><mode>" 4251 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 4252 (neg:<V_cmp_result> 4253 (UCOMPARISONS:<V_cmp_result> 4254 (match_operand:VDQ_I 1 "register_operand" "w") 4255 (match_operand:VDQ_I 2 "register_operand" "w") 4256 )))] 4257 "TARGET_SIMD" 4258 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4259 [(set_attr "type" "neon_compare<q>")] 4260) 4261 4262(define_insn_and_split "aarch64_cm<optab>di" 4263 [(set (match_operand:DI 0 "register_operand" "=w,r") 4264 (neg:DI 4265 (UCOMPARISONS:DI 4266 (match_operand:DI 1 "register_operand" "w,r") 4267 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 4268 ))) 4269 (clobber (reg:CC CC_REGNUM))] 4270 "TARGET_SIMD" 4271 "#" 4272 "reload_completed" 4273 [(set (match_operand:DI 0 "register_operand") 4274 (neg:DI 4275 (UCOMPARISONS:DI 4276 (match_operand:DI 1 "register_operand") 4277 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4278 )))] 4279 { 4280 /* If we are in the general purpose register file, 4281 we split to a sequence of comparison and store. */ 4282 if (GP_REGNUM_P (REGNO (operands[0])) 4283 && GP_REGNUM_P (REGNO (operands[1]))) 4284 { 4285 machine_mode mode = CCmode; 4286 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4287 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4288 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4289 DONE; 4290 } 4291 /* Otherwise, we expand to a similar pattern which does not 4292 clobber CC_REGNUM. */ 4293 } 4294 [(set_attr "type" "neon_compare,multiple")] 4295) 4296 4297(define_insn "*aarch64_cm<optab>di" 4298 [(set (match_operand:DI 0 "register_operand" "=w") 4299 (neg:DI 4300 (UCOMPARISONS:DI 4301 (match_operand:DI 1 "register_operand" "w") 4302 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") 4303 )))] 4304 "TARGET_SIMD && reload_completed" 4305 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" 4306 [(set_attr "type" "neon_compare")] 4307) 4308 4309;; cmtst 4310 4311;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, 4312;; we don't have any insns using ne, and aarch64_vcond outputs 4313;; not (neg (eq (and x y) 0)) 4314;; which is rewritten by simplify_rtx as 4315;; plus (eq (and x y) 0) -1. 4316 4317(define_insn "aarch64_cmtst<mode>" 4318 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 4319 (plus:<V_cmp_result> 4320 (eq:<V_cmp_result> 4321 (and:VDQ_I 4322 (match_operand:VDQ_I 1 "register_operand" "w") 4323 (match_operand:VDQ_I 2 "register_operand" "w")) 4324 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) 4325 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one"))) 4326 ] 4327 "TARGET_SIMD" 4328 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4329 [(set_attr "type" "neon_tst<q>")] 4330) 4331 4332(define_insn_and_split "aarch64_cmtstdi" 4333 [(set (match_operand:DI 0 "register_operand" "=w,r") 4334 (neg:DI 4335 (ne:DI 4336 (and:DI 4337 (match_operand:DI 1 "register_operand" "w,r") 4338 (match_operand:DI 2 "register_operand" "w,r")) 4339 (const_int 0)))) 4340 (clobber (reg:CC CC_REGNUM))] 4341 "TARGET_SIMD" 4342 "#" 4343 "reload_completed" 4344 [(set (match_operand:DI 0 "register_operand") 4345 (neg:DI 4346 (ne:DI 4347 (and:DI 4348 (match_operand:DI 1 "register_operand") 4349 (match_operand:DI 2 "register_operand")) 4350 (const_int 0))))] 4351 { 4352 /* If we are in the general purpose register file, 4353 we split to a sequence of comparison and store. */ 4354 if (GP_REGNUM_P (REGNO (operands[0])) 4355 && GP_REGNUM_P (REGNO (operands[1]))) 4356 { 4357 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); 4358 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); 4359 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); 4360 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); 4361 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4362 DONE; 4363 } 4364 /* Otherwise, we expand to a similar pattern which does not 4365 clobber CC_REGNUM. */ 4366 } 4367 [(set_attr "type" "neon_tst,multiple")] 4368) 4369 4370(define_insn "*aarch64_cmtstdi" 4371 [(set (match_operand:DI 0 "register_operand" "=w") 4372 (neg:DI 4373 (ne:DI 4374 (and:DI 4375 (match_operand:DI 1 "register_operand" "w") 4376 (match_operand:DI 2 "register_operand" "w")) 4377 (const_int 0))))] 4378 "TARGET_SIMD" 4379 "cmtst\t%d0, %d1, %d2" 4380 [(set_attr "type" "neon_tst")] 4381) 4382 4383;; fcm(eq|ge|gt|le|lt) 4384 4385(define_insn "aarch64_cm<optab><mode>" 4386 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") 4387 (neg:<V_cmp_result> 4388 (COMPARISONS:<V_cmp_result> 4389 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") 4390 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") 4391 )))] 4392 "TARGET_SIMD" 4393 "@ 4394 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4395 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" 4396 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4397) 4398 4399;; fac(ge|gt) 4400;; Note we can also handle what would be fac(le|lt) by 4401;; generating fac(ge|gt). 4402 4403(define_insn "aarch64_fac<optab><mode>" 4404 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 4405 (neg:<V_cmp_result> 4406 (FAC_COMPARISONS:<V_cmp_result> 4407 (abs:VHSDF_HSDF 4408 (match_operand:VHSDF_HSDF 1 "register_operand" "w")) 4409 (abs:VHSDF_HSDF 4410 (match_operand:VHSDF_HSDF 2 "register_operand" "w")) 4411 )))] 4412 "TARGET_SIMD" 4413 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4414 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4415) 4416 4417;; addp 4418 4419(define_insn "aarch64_addp<mode>" 4420 [(set (match_operand:VD_BHSI 0 "register_operand" "=w") 4421 (unspec:VD_BHSI 4422 [(match_operand:VD_BHSI 1 "register_operand" "w") 4423 (match_operand:VD_BHSI 2 "register_operand" "w")] 4424 UNSPEC_ADDP))] 4425 "TARGET_SIMD" 4426 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4427 [(set_attr "type" "neon_reduc_add<q>")] 4428) 4429 4430(define_insn "aarch64_addpdi" 4431 [(set (match_operand:DI 0 "register_operand" "=w") 4432 (unspec:DI 4433 [(match_operand:V2DI 1 "register_operand" "w")] 4434 UNSPEC_ADDP))] 4435 "TARGET_SIMD" 4436 "addp\t%d0, %1.2d" 4437 [(set_attr "type" "neon_reduc_add")] 4438) 4439 4440;; sqrt 4441 4442(define_expand "sqrt<mode>2" 4443 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4444 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4445 "TARGET_SIMD" 4446{ 4447 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 4448 DONE; 4449}) 4450 4451(define_insn "*sqrt<mode>2" 4452 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4453 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4454 "TARGET_SIMD" 4455 "fsqrt\\t%0.<Vtype>, %1.<Vtype>" 4456 [(set_attr "type" "neon_fp_sqrt_<stype><q>")] 4457) 4458 4459;; Patterns for vector struct loads and stores. 4460 4461(define_insn "aarch64_simd_ld2<mode>" 4462 [(set (match_operand:OI 0 "register_operand" "=w") 4463 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4464 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4465 UNSPEC_LD2))] 4466 "TARGET_SIMD" 4467 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4468 [(set_attr "type" "neon_load2_2reg<q>")] 4469) 4470 4471(define_insn "aarch64_simd_ld2r<mode>" 4472 [(set (match_operand:OI 0 "register_operand" "=w") 4473 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4474 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4475 UNSPEC_LD2_DUP))] 4476 "TARGET_SIMD" 4477 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4478 [(set_attr "type" "neon_load2_all_lanes<q>")] 4479) 4480 4481(define_insn "aarch64_vec_load_lanesoi_lane<mode>" 4482 [(set (match_operand:OI 0 "register_operand" "=w") 4483 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4484 (match_operand:OI 2 "register_operand" "0") 4485 (match_operand:SI 3 "immediate_operand" "i") 4486 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4487 UNSPEC_LD2_LANE))] 4488 "TARGET_SIMD" 4489 { 4490 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 4491 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 4492 } 4493 [(set_attr "type" "neon_load2_one_lane")] 4494) 4495 4496(define_expand "vec_load_lanesoi<mode>" 4497 [(set (match_operand:OI 0 "register_operand" "=w") 4498 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4499 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4500 UNSPEC_LD2))] 4501 "TARGET_SIMD" 4502{ 4503 if (BYTES_BIG_ENDIAN) 4504 { 4505 rtx tmp = gen_reg_rtx (OImode); 4506 rtx mask = aarch64_reverse_mask (<MODE>mode); 4507 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 4508 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 4509 } 4510 else 4511 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 4512 DONE; 4513}) 4514 4515(define_insn "aarch64_simd_st2<mode>" 4516 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4517 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4518 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4519 UNSPEC_ST2))] 4520 "TARGET_SIMD" 4521 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 4522 [(set_attr "type" "neon_store2_2reg<q>")] 4523) 4524 4525;; RTL uses GCC vector extension indices, so flip only for assembly. 4526(define_insn "aarch64_vec_store_lanesoi_lane<mode>" 4527 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4528 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 4529 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4530 (match_operand:SI 2 "immediate_operand" "i")] 4531 UNSPEC_ST2_LANE))] 4532 "TARGET_SIMD" 4533 { 4534 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 4535 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 4536 } 4537 [(set_attr "type" "neon_store2_one_lane<q>")] 4538) 4539 4540(define_expand "vec_store_lanesoi<mode>" 4541 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4542 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4543 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4544 UNSPEC_ST2))] 4545 "TARGET_SIMD" 4546{ 4547 if (BYTES_BIG_ENDIAN) 4548 { 4549 rtx tmp = gen_reg_rtx (OImode); 4550 rtx mask = aarch64_reverse_mask (<MODE>mode); 4551 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 4552 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 4553 } 4554 else 4555 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 4556 DONE; 4557}) 4558 4559(define_insn "aarch64_simd_ld3<mode>" 4560 [(set (match_operand:CI 0 "register_operand" "=w") 4561 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 4562 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4563 UNSPEC_LD3))] 4564 "TARGET_SIMD" 4565 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 4566 [(set_attr "type" "neon_load3_3reg<q>")] 4567) 4568 4569(define_insn "aarch64_simd_ld3r<mode>" 4570 [(set (match_operand:CI 0 "register_operand" "=w") 4571 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4572 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4573 UNSPEC_LD3_DUP))] 4574 "TARGET_SIMD" 4575 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 4576 [(set_attr "type" "neon_load3_all_lanes<q>")] 4577) 4578 4579(define_insn "aarch64_vec_load_lanesci_lane<mode>" 4580 [(set (match_operand:CI 0 "register_operand" "=w") 4581 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4582 (match_operand:CI 2 "register_operand" "0") 4583 (match_operand:SI 3 "immediate_operand" "i") 4584 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4585 UNSPEC_LD3_LANE))] 4586 "TARGET_SIMD" 4587{ 4588 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 4589 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 4590} 4591 [(set_attr "type" "neon_load3_one_lane")] 4592) 4593 4594(define_expand "vec_load_lanesci<mode>" 4595 [(set (match_operand:CI 0 "register_operand" "=w") 4596 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 4597 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4598 UNSPEC_LD3))] 4599 "TARGET_SIMD" 4600{ 4601 if (BYTES_BIG_ENDIAN) 4602 { 4603 rtx tmp = gen_reg_rtx (CImode); 4604 rtx mask = aarch64_reverse_mask (<MODE>mode); 4605 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 4606 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 4607 } 4608 else 4609 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 4610 DONE; 4611}) 4612 4613(define_insn "aarch64_simd_st3<mode>" 4614 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 4615 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 4616 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4617 UNSPEC_ST3))] 4618 "TARGET_SIMD" 4619 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 4620 [(set_attr "type" "neon_store3_3reg<q>")] 4621) 4622 4623;; RTL uses GCC vector extension indices, so flip only for assembly. 4624(define_insn "aarch64_vec_store_lanesci_lane<mode>" 4625 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4626 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 4627 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4628 (match_operand:SI 2 "immediate_operand" "i")] 4629 UNSPEC_ST3_LANE))] 4630 "TARGET_SIMD" 4631 { 4632 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 4633 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 4634 } 4635 [(set_attr "type" "neon_store3_one_lane<q>")] 4636) 4637 4638(define_expand "vec_store_lanesci<mode>" 4639 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 4640 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 4641 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4642 UNSPEC_ST3))] 4643 "TARGET_SIMD" 4644{ 4645 if (BYTES_BIG_ENDIAN) 4646 { 4647 rtx tmp = gen_reg_rtx (CImode); 4648 rtx mask = aarch64_reverse_mask (<MODE>mode); 4649 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 4650 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 4651 } 4652 else 4653 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 4654 DONE; 4655}) 4656 4657(define_insn "aarch64_simd_ld4<mode>" 4658 [(set (match_operand:XI 0 "register_operand" "=w") 4659 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 4660 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4661 UNSPEC_LD4))] 4662 "TARGET_SIMD" 4663 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 4664 [(set_attr "type" "neon_load4_4reg<q>")] 4665) 4666 4667(define_insn "aarch64_simd_ld4r<mode>" 4668 [(set (match_operand:XI 0 "register_operand" "=w") 4669 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4670 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4671 UNSPEC_LD4_DUP))] 4672 "TARGET_SIMD" 4673 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 4674 [(set_attr "type" "neon_load4_all_lanes<q>")] 4675) 4676 4677(define_insn "aarch64_vec_load_lanesxi_lane<mode>" 4678 [(set (match_operand:XI 0 "register_operand" "=w") 4679 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4680 (match_operand:XI 2 "register_operand" "0") 4681 (match_operand:SI 3 "immediate_operand" "i") 4682 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4683 UNSPEC_LD4_LANE))] 4684 "TARGET_SIMD" 4685{ 4686 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 4687 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 4688} 4689 [(set_attr "type" "neon_load4_one_lane")] 4690) 4691 4692(define_expand "vec_load_lanesxi<mode>" 4693 [(set (match_operand:XI 0 "register_operand" "=w") 4694 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 4695 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4696 UNSPEC_LD4))] 4697 "TARGET_SIMD" 4698{ 4699 if (BYTES_BIG_ENDIAN) 4700 { 4701 rtx tmp = gen_reg_rtx (XImode); 4702 rtx mask = aarch64_reverse_mask (<MODE>mode); 4703 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 4704 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 4705 } 4706 else 4707 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 4708 DONE; 4709}) 4710 4711(define_insn "aarch64_simd_st4<mode>" 4712 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 4713 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 4714 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4715 UNSPEC_ST4))] 4716 "TARGET_SIMD" 4717 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 4718 [(set_attr "type" "neon_store4_4reg<q>")] 4719) 4720 4721;; RTL uses GCC vector extension indices, so flip only for assembly. 4722(define_insn "aarch64_vec_store_lanesxi_lane<mode>" 4723 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4724 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 4725 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4726 (match_operand:SI 2 "immediate_operand" "i")] 4727 UNSPEC_ST4_LANE))] 4728 "TARGET_SIMD" 4729 { 4730 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 4731 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 4732 } 4733 [(set_attr "type" "neon_store4_one_lane<q>")] 4734) 4735 4736(define_expand "vec_store_lanesxi<mode>" 4737 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 4738 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 4739 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4740 UNSPEC_ST4))] 4741 "TARGET_SIMD" 4742{ 4743 if (BYTES_BIG_ENDIAN) 4744 { 4745 rtx tmp = gen_reg_rtx (XImode); 4746 rtx mask = aarch64_reverse_mask (<MODE>mode); 4747 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 4748 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 4749 } 4750 else 4751 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 4752 DONE; 4753}) 4754 4755(define_insn_and_split "aarch64_rev_reglist<mode>" 4756[(set (match_operand:VSTRUCT 0 "register_operand" "=&w") 4757 (unspec:VSTRUCT 4758 [(match_operand:VSTRUCT 1 "register_operand" "w") 4759 (match_operand:V16QI 2 "register_operand" "w")] 4760 UNSPEC_REV_REGLIST))] 4761 "TARGET_SIMD" 4762 "#" 4763 "&& reload_completed" 4764 [(const_int 0)] 4765{ 4766 int i; 4767 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; 4768 for (i = 0; i < nregs; i++) 4769 { 4770 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); 4771 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); 4772 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); 4773 } 4774 DONE; 4775} 4776 [(set_attr "type" "neon_tbl1_q") 4777 (set_attr "length" "<insn_count>")] 4778) 4779 4780;; Reload patterns for AdvSIMD register list operands. 4781 4782(define_expand "mov<mode>" 4783 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") 4784 (match_operand:VSTRUCT 1 "general_operand" ""))] 4785 "TARGET_SIMD" 4786{ 4787 if (can_create_pseudo_p ()) 4788 { 4789 if (GET_CODE (operands[0]) != REG) 4790 operands[1] = force_reg (<MODE>mode, operands[1]); 4791 } 4792}) 4793 4794(define_insn "*aarch64_mov<mode>" 4795 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 4796 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 4797 "TARGET_SIMD && !BYTES_BIG_ENDIAN 4798 && (register_operand (operands[0], <MODE>mode) 4799 || register_operand (operands[1], <MODE>mode))" 4800 "@ 4801 # 4802 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 4803 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" 4804 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ 4805 neon_load<nregs>_<nregs>reg_q") 4806 (set_attr "length" "<insn_count>,4,4")] 4807) 4808 4809(define_insn "aarch64_be_ld1<mode>" 4810 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") 4811 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 4812 "aarch64_simd_struct_operand" "Utv")] 4813 UNSPEC_LD1))] 4814 "TARGET_SIMD" 4815 "ld1\\t{%0<Vmtype>}, %1" 4816 [(set_attr "type" "neon_load1_1reg<q>")] 4817) 4818 4819(define_insn "aarch64_be_st1<mode>" 4820 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") 4821 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] 4822 UNSPEC_ST1))] 4823 "TARGET_SIMD" 4824 "st1\\t{%1<Vmtype>}, %0" 4825 [(set_attr "type" "neon_store1_1reg<q>")] 4826) 4827 4828(define_insn "*aarch64_be_movoi" 4829 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") 4830 (match_operand:OI 1 "general_operand" " w,w,m"))] 4831 "TARGET_SIMD && BYTES_BIG_ENDIAN 4832 && (register_operand (operands[0], OImode) 4833 || register_operand (operands[1], OImode))" 4834 "@ 4835 # 4836 stp\\t%q1, %R1, %0 4837 ldp\\t%q0, %R0, %1" 4838 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") 4839 (set_attr "length" "8,4,4")] 4840) 4841 4842(define_insn "*aarch64_be_movci" 4843 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") 4844 (match_operand:CI 1 "general_operand" " w,w,o"))] 4845 "TARGET_SIMD && BYTES_BIG_ENDIAN 4846 && (register_operand (operands[0], CImode) 4847 || register_operand (operands[1], CImode))" 4848 "#" 4849 [(set_attr "type" "multiple") 4850 (set_attr "length" "12,4,4")] 4851) 4852 4853(define_insn "*aarch64_be_movxi" 4854 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") 4855 (match_operand:XI 1 "general_operand" " w,w,o"))] 4856 "TARGET_SIMD && BYTES_BIG_ENDIAN 4857 && (register_operand (operands[0], XImode) 4858 || register_operand (operands[1], XImode))" 4859 "#" 4860 [(set_attr "type" "multiple") 4861 (set_attr "length" "16,4,4")] 4862) 4863 4864(define_split 4865 [(set (match_operand:OI 0 "register_operand") 4866 (match_operand:OI 1 "register_operand"))] 4867 "TARGET_SIMD && reload_completed" 4868 [(const_int 0)] 4869{ 4870 aarch64_simd_emit_reg_reg_move (operands, TImode, 2); 4871 DONE; 4872}) 4873 4874(define_split 4875 [(set (match_operand:CI 0 "nonimmediate_operand") 4876 (match_operand:CI 1 "general_operand"))] 4877 "TARGET_SIMD && reload_completed" 4878 [(const_int 0)] 4879{ 4880 if (register_operand (operands[0], CImode) 4881 && register_operand (operands[1], CImode)) 4882 { 4883 aarch64_simd_emit_reg_reg_move (operands, TImode, 3); 4884 DONE; 4885 } 4886 else if (BYTES_BIG_ENDIAN) 4887 { 4888 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), 4889 simplify_gen_subreg (OImode, operands[1], CImode, 0)); 4890 emit_move_insn (gen_lowpart (V16QImode, 4891 simplify_gen_subreg (TImode, operands[0], 4892 CImode, 32)), 4893 gen_lowpart (V16QImode, 4894 simplify_gen_subreg (TImode, operands[1], 4895 CImode, 32))); 4896 DONE; 4897 } 4898 else 4899 FAIL; 4900}) 4901 4902(define_split 4903 [(set (match_operand:XI 0 "nonimmediate_operand") 4904 (match_operand:XI 1 "general_operand"))] 4905 "TARGET_SIMD && reload_completed" 4906 [(const_int 0)] 4907{ 4908 if (register_operand (operands[0], XImode) 4909 && register_operand (operands[1], XImode)) 4910 { 4911 aarch64_simd_emit_reg_reg_move (operands, TImode, 4); 4912 DONE; 4913 } 4914 else if (BYTES_BIG_ENDIAN) 4915 { 4916 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), 4917 simplify_gen_subreg (OImode, operands[1], XImode, 0)); 4918 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), 4919 simplify_gen_subreg (OImode, operands[1], XImode, 32)); 4920 DONE; 4921 } 4922 else 4923 FAIL; 4924}) 4925 4926(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" 4927 [(match_operand:VSTRUCT 0 "register_operand" "=w") 4928 (match_operand:DI 1 "register_operand" "w") 4929 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4930 "TARGET_SIMD" 4931{ 4932 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 4933 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 4934 * <VSTRUCT:nregs>); 4935 4936 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], 4937 mem)); 4938 DONE; 4939}) 4940 4941(define_insn "aarch64_ld2<mode>_dreg_le" 4942 [(set (match_operand:OI 0 "register_operand" "=w") 4943 (subreg:OI 4944 (vec_concat:<VRL2> 4945 (vec_concat:<VDBL> 4946 (unspec:VD 4947 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 4948 UNSPEC_LD2) 4949 (vec_duplicate:VD (const_int 0))) 4950 (vec_concat:<VDBL> 4951 (unspec:VD [(match_dup 1)] 4952 UNSPEC_LD2) 4953 (vec_duplicate:VD (const_int 0)))) 0))] 4954 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 4955 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4956 [(set_attr "type" "neon_load2_2reg<q>")] 4957) 4958 4959(define_insn "aarch64_ld2<mode>_dreg_be" 4960 [(set (match_operand:OI 0 "register_operand" "=w") 4961 (subreg:OI 4962 (vec_concat:<VRL2> 4963 (vec_concat:<VDBL> 4964 (vec_duplicate:VD (const_int 0)) 4965 (unspec:VD 4966 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 4967 UNSPEC_LD2)) 4968 (vec_concat:<VDBL> 4969 (vec_duplicate:VD (const_int 0)) 4970 (unspec:VD [(match_dup 1)] 4971 UNSPEC_LD2))) 0))] 4972 "TARGET_SIMD && BYTES_BIG_ENDIAN" 4973 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4974 [(set_attr "type" "neon_load2_2reg<q>")] 4975) 4976 4977(define_insn "aarch64_ld2<mode>_dreg_le" 4978 [(set (match_operand:OI 0 "register_operand" "=w") 4979 (subreg:OI 4980 (vec_concat:<VRL2> 4981 (vec_concat:<VDBL> 4982 (unspec:DX 4983 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 4984 UNSPEC_LD2) 4985 (const_int 0)) 4986 (vec_concat:<VDBL> 4987 (unspec:DX [(match_dup 1)] 4988 UNSPEC_LD2) 4989 (const_int 0))) 0))] 4990 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 4991 "ld1\\t{%S0.1d - %T0.1d}, %1" 4992 [(set_attr "type" "neon_load1_2reg<q>")] 4993) 4994 4995(define_insn "aarch64_ld2<mode>_dreg_be" 4996 [(set (match_operand:OI 0 "register_operand" "=w") 4997 (subreg:OI 4998 (vec_concat:<VRL2> 4999 (vec_concat:<VDBL> 5000 (const_int 0) 5001 (unspec:DX 5002 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5003 UNSPEC_LD2)) 5004 (vec_concat:<VDBL> 5005 (const_int 0) 5006 (unspec:DX [(match_dup 1)] 5007 UNSPEC_LD2))) 0))] 5008 "TARGET_SIMD && BYTES_BIG_ENDIAN" 5009 "ld1\\t{%S0.1d - %T0.1d}, %1" 5010 [(set_attr "type" "neon_load1_2reg<q>")] 5011) 5012 5013(define_insn "aarch64_ld3<mode>_dreg_le" 5014 [(set (match_operand:CI 0 "register_operand" "=w") 5015 (subreg:CI 5016 (vec_concat:<VRL3> 5017 (vec_concat:<VRL2> 5018 (vec_concat:<VDBL> 5019 (unspec:VD 5020 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5021 UNSPEC_LD3) 5022 (vec_duplicate:VD (const_int 0))) 5023 (vec_concat:<VDBL> 5024 (unspec:VD [(match_dup 1)] 5025 UNSPEC_LD3) 5026 (vec_duplicate:VD (const_int 0)))) 5027 (vec_concat:<VDBL> 5028 (unspec:VD [(match_dup 1)] 5029 UNSPEC_LD3) 5030 (vec_duplicate:VD (const_int 0)))) 0))] 5031 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 5032 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5033 [(set_attr "type" "neon_load3_3reg<q>")] 5034) 5035 5036(define_insn "aarch64_ld3<mode>_dreg_be" 5037 [(set (match_operand:CI 0 "register_operand" "=w") 5038 (subreg:CI 5039 (vec_concat:<VRL3> 5040 (vec_concat:<VRL2> 5041 (vec_concat:<VDBL> 5042 (vec_duplicate:VD (const_int 0)) 5043 (unspec:VD 5044 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5045 UNSPEC_LD3)) 5046 (vec_concat:<VDBL> 5047 (vec_duplicate:VD (const_int 0)) 5048 (unspec:VD [(match_dup 1)] 5049 UNSPEC_LD3))) 5050 (vec_concat:<VDBL> 5051 (vec_duplicate:VD (const_int 0)) 5052 (unspec:VD [(match_dup 1)] 5053 UNSPEC_LD3))) 0))] 5054 "TARGET_SIMD && BYTES_BIG_ENDIAN" 5055 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5056 [(set_attr "type" "neon_load3_3reg<q>")] 5057) 5058 5059(define_insn "aarch64_ld3<mode>_dreg_le" 5060 [(set (match_operand:CI 0 "register_operand" "=w") 5061 (subreg:CI 5062 (vec_concat:<VRL3> 5063 (vec_concat:<VRL2> 5064 (vec_concat:<VDBL> 5065 (unspec:DX 5066 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5067 UNSPEC_LD3) 5068 (const_int 0)) 5069 (vec_concat:<VDBL> 5070 (unspec:DX [(match_dup 1)] 5071 UNSPEC_LD3) 5072 (const_int 0))) 5073 (vec_concat:<VDBL> 5074 (unspec:DX [(match_dup 1)] 5075 UNSPEC_LD3) 5076 (const_int 0))) 0))] 5077 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 5078 "ld1\\t{%S0.1d - %U0.1d}, %1" 5079 [(set_attr "type" "neon_load1_3reg<q>")] 5080) 5081 5082(define_insn "aarch64_ld3<mode>_dreg_be" 5083 [(set (match_operand:CI 0 "register_operand" "=w") 5084 (subreg:CI 5085 (vec_concat:<VRL3> 5086 (vec_concat:<VRL2> 5087 (vec_concat:<VDBL> 5088 (const_int 0) 5089 (unspec:DX 5090 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5091 UNSPEC_LD3)) 5092 (vec_concat:<VDBL> 5093 (const_int 0) 5094 (unspec:DX [(match_dup 1)] 5095 UNSPEC_LD3))) 5096 (vec_concat:<VDBL> 5097 (const_int 0) 5098 (unspec:DX [(match_dup 1)] 5099 UNSPEC_LD3))) 0))] 5100 "TARGET_SIMD && BYTES_BIG_ENDIAN" 5101 "ld1\\t{%S0.1d - %U0.1d}, %1" 5102 [(set_attr "type" "neon_load1_3reg<q>")] 5103) 5104 5105(define_insn "aarch64_ld4<mode>_dreg_le" 5106 [(set (match_operand:XI 0 "register_operand" "=w") 5107 (subreg:XI 5108 (vec_concat:<VRL4> 5109 (vec_concat:<VRL2> 5110 (vec_concat:<VDBL> 5111 (unspec:VD 5112 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5113 UNSPEC_LD4) 5114 (vec_duplicate:VD (const_int 0))) 5115 (vec_concat:<VDBL> 5116 (unspec:VD [(match_dup 1)] 5117 UNSPEC_LD4) 5118 (vec_duplicate:VD (const_int 0)))) 5119 (vec_concat:<VRL2> 5120 (vec_concat:<VDBL> 5121 (unspec:VD [(match_dup 1)] 5122 UNSPEC_LD4) 5123 (vec_duplicate:VD (const_int 0))) 5124 (vec_concat:<VDBL> 5125 (unspec:VD [(match_dup 1)] 5126 UNSPEC_LD4) 5127 (vec_duplicate:VD (const_int 0))))) 0))] 5128 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 5129 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5130 [(set_attr "type" "neon_load4_4reg<q>")] 5131) 5132 5133(define_insn "aarch64_ld4<mode>_dreg_be" 5134 [(set (match_operand:XI 0 "register_operand" "=w") 5135 (subreg:XI 5136 (vec_concat:<VRL4> 5137 (vec_concat:<VRL2> 5138 (vec_concat:<VDBL> 5139 (vec_duplicate:VD (const_int 0)) 5140 (unspec:VD 5141 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5142 UNSPEC_LD4)) 5143 (vec_concat:<VDBL> 5144 (vec_duplicate:VD (const_int 0)) 5145 (unspec:VD [(match_dup 1)] 5146 UNSPEC_LD4))) 5147 (vec_concat:<VRL2> 5148 (vec_concat:<VDBL> 5149 (vec_duplicate:VD (const_int 0)) 5150 (unspec:VD [(match_dup 1)] 5151 UNSPEC_LD4)) 5152 (vec_concat:<VDBL> 5153 (vec_duplicate:VD (const_int 0)) 5154 (unspec:VD [(match_dup 1)] 5155 UNSPEC_LD4)))) 0))] 5156 "TARGET_SIMD && BYTES_BIG_ENDIAN" 5157 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5158 [(set_attr "type" "neon_load4_4reg<q>")] 5159) 5160 5161(define_insn "aarch64_ld4<mode>_dreg_le" 5162 [(set (match_operand:XI 0 "register_operand" "=w") 5163 (subreg:XI 5164 (vec_concat:<VRL4> 5165 (vec_concat:<VRL2> 5166 (vec_concat:<VDBL> 5167 (unspec:DX 5168 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5169 UNSPEC_LD4) 5170 (const_int 0)) 5171 (vec_concat:<VDBL> 5172 (unspec:DX [(match_dup 1)] 5173 UNSPEC_LD4) 5174 (const_int 0))) 5175 (vec_concat:<VRL2> 5176 (vec_concat:<VDBL> 5177 (unspec:DX [(match_dup 1)] 5178 UNSPEC_LD4) 5179 (const_int 0)) 5180 (vec_concat:<VDBL> 5181 (unspec:DX [(match_dup 1)] 5182 UNSPEC_LD4) 5183 (const_int 0)))) 0))] 5184 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 5185 "ld1\\t{%S0.1d - %V0.1d}, %1" 5186 [(set_attr "type" "neon_load1_4reg<q>")] 5187) 5188 5189(define_insn "aarch64_ld4<mode>_dreg_be" 5190 [(set (match_operand:XI 0 "register_operand" "=w") 5191 (subreg:XI 5192 (vec_concat:<VRL4> 5193 (vec_concat:<VRL2> 5194 (vec_concat:<VDBL> 5195 (const_int 0) 5196 (unspec:DX 5197 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] 5198 UNSPEC_LD4)) 5199 (vec_concat:<VDBL> 5200 (const_int 0) 5201 (unspec:DX [(match_dup 1)] 5202 UNSPEC_LD4))) 5203 (vec_concat:<VRL2> 5204 (vec_concat:<VDBL> 5205 (const_int 0) 5206 (unspec:DX [(match_dup 1)] 5207 UNSPEC_LD4)) 5208 (vec_concat:<VDBL> 5209 (const_int 0) 5210 (unspec:DX [(match_dup 1)] 5211 UNSPEC_LD4)))) 0))] 5212 "TARGET_SIMD && BYTES_BIG_ENDIAN" 5213 "ld1\\t{%S0.1d - %V0.1d}, %1" 5214 [(set_attr "type" "neon_load1_4reg<q>")] 5215) 5216 5217(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" 5218 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5219 (match_operand:DI 1 "register_operand" "r") 5220 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5221 "TARGET_SIMD" 5222{ 5223 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5224 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5225 5226 if (BYTES_BIG_ENDIAN) 5227 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0], 5228 mem)); 5229 else 5230 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0], 5231 mem)); 5232 DONE; 5233}) 5234 5235(define_expand "aarch64_ld1<VALL_F16:mode>" 5236 [(match_operand:VALL_F16 0 "register_operand") 5237 (match_operand:DI 1 "register_operand")] 5238 "TARGET_SIMD" 5239{ 5240 machine_mode mode = <VALL_F16:MODE>mode; 5241 rtx mem = gen_rtx_MEM (mode, operands[1]); 5242 5243 if (BYTES_BIG_ENDIAN) 5244 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); 5245 else 5246 emit_move_insn (operands[0], mem); 5247 DONE; 5248}) 5249 5250(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" 5251 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5252 (match_operand:DI 1 "register_operand" "r") 5253 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5254 "TARGET_SIMD" 5255{ 5256 machine_mode mode = <VSTRUCT:MODE>mode; 5257 rtx mem = gen_rtx_MEM (mode, operands[1]); 5258 5259 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 5260 DONE; 5261}) 5262 5263(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5264 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5265 (match_operand:DI 1 "register_operand" "w") 5266 (match_operand:VSTRUCT 2 "register_operand" "0") 5267 (match_operand:SI 3 "immediate_operand" "i") 5268 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5269 "TARGET_SIMD" 5270{ 5271 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5272 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5273 * <VSTRUCT:nregs>); 5274 5275 aarch64_simd_lane_bounds (operands[3], 0, 5276 GET_MODE_NUNITS (<VALLDIF:MODE>mode), 5277 NULL); 5278 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5279 operands[0], mem, operands[2], operands[3])); 5280 DONE; 5281}) 5282 5283;; Expanders for builtins to extract vector registers from large 5284;; opaque integer modes. 5285 5286;; D-register list. 5287 5288(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" 5289 [(match_operand:VDC 0 "register_operand" "=w") 5290 (match_operand:VSTRUCT 1 "register_operand" "w") 5291 (match_operand:SI 2 "immediate_operand" "i")] 5292 "TARGET_SIMD" 5293{ 5294 int part = INTVAL (operands[2]); 5295 rtx temp = gen_reg_rtx (<VDC:VDBL>mode); 5296 int offset = part * 16; 5297 5298 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); 5299 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); 5300 DONE; 5301}) 5302 5303;; Q-register list. 5304 5305(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" 5306 [(match_operand:VQ 0 "register_operand" "=w") 5307 (match_operand:VSTRUCT 1 "register_operand" "w") 5308 (match_operand:SI 2 "immediate_operand" "i")] 5309 "TARGET_SIMD" 5310{ 5311 int part = INTVAL (operands[2]); 5312 int offset = part * 16; 5313 5314 emit_move_insn (operands[0], 5315 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); 5316 DONE; 5317}) 5318 5319;; Permuted-store expanders for neon intrinsics. 5320 5321;; Permute instructions 5322 5323;; vec_perm support 5324 5325(define_expand "vec_perm_const<mode>" 5326 [(match_operand:VALL_F16 0 "register_operand") 5327 (match_operand:VALL_F16 1 "register_operand") 5328 (match_operand:VALL_F16 2 "register_operand") 5329 (match_operand:<V_cmp_result> 3)] 5330 "TARGET_SIMD" 5331{ 5332 if (aarch64_expand_vec_perm_const (operands[0], operands[1], 5333 operands[2], operands[3])) 5334 DONE; 5335 else 5336 FAIL; 5337}) 5338 5339(define_expand "vec_perm<mode>" 5340 [(match_operand:VB 0 "register_operand") 5341 (match_operand:VB 1 "register_operand") 5342 (match_operand:VB 2 "register_operand") 5343 (match_operand:VB 3 "register_operand")] 5344 "TARGET_SIMD" 5345{ 5346 aarch64_expand_vec_perm (operands[0], operands[1], 5347 operands[2], operands[3]); 5348 DONE; 5349}) 5350 5351(define_insn "aarch64_tbl1<mode>" 5352 [(set (match_operand:VB 0 "register_operand" "=w") 5353 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") 5354 (match_operand:VB 2 "register_operand" "w")] 5355 UNSPEC_TBL))] 5356 "TARGET_SIMD" 5357 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" 5358 [(set_attr "type" "neon_tbl1<q>")] 5359) 5360 5361;; Two source registers. 5362 5363(define_insn "aarch64_tbl2v16qi" 5364 [(set (match_operand:V16QI 0 "register_operand" "=w") 5365 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") 5366 (match_operand:V16QI 2 "register_operand" "w")] 5367 UNSPEC_TBL))] 5368 "TARGET_SIMD" 5369 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" 5370 [(set_attr "type" "neon_tbl2_q")] 5371) 5372 5373(define_insn "aarch64_tbl3<mode>" 5374 [(set (match_operand:VB 0 "register_operand" "=w") 5375 (unspec:VB [(match_operand:OI 1 "register_operand" "w") 5376 (match_operand:VB 2 "register_operand" "w")] 5377 UNSPEC_TBL))] 5378 "TARGET_SIMD" 5379 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" 5380 [(set_attr "type" "neon_tbl3")] 5381) 5382 5383(define_insn "aarch64_tbx4<mode>" 5384 [(set (match_operand:VB 0 "register_operand" "=w") 5385 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5386 (match_operand:OI 2 "register_operand" "w") 5387 (match_operand:VB 3 "register_operand" "w")] 5388 UNSPEC_TBX))] 5389 "TARGET_SIMD" 5390 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" 5391 [(set_attr "type" "neon_tbl4")] 5392) 5393 5394;; Three source registers. 5395 5396(define_insn "aarch64_qtbl3<mode>" 5397 [(set (match_operand:VB 0 "register_operand" "=w") 5398 (unspec:VB [(match_operand:CI 1 "register_operand" "w") 5399 (match_operand:VB 2 "register_operand" "w")] 5400 UNSPEC_TBL))] 5401 "TARGET_SIMD" 5402 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" 5403 [(set_attr "type" "neon_tbl3")] 5404) 5405 5406(define_insn "aarch64_qtbx3<mode>" 5407 [(set (match_operand:VB 0 "register_operand" "=w") 5408 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5409 (match_operand:CI 2 "register_operand" "w") 5410 (match_operand:VB 3 "register_operand" "w")] 5411 UNSPEC_TBX))] 5412 "TARGET_SIMD" 5413 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" 5414 [(set_attr "type" "neon_tbl3")] 5415) 5416 5417;; Four source registers. 5418 5419(define_insn "aarch64_qtbl4<mode>" 5420 [(set (match_operand:VB 0 "register_operand" "=w") 5421 (unspec:VB [(match_operand:XI 1 "register_operand" "w") 5422 (match_operand:VB 2 "register_operand" "w")] 5423 UNSPEC_TBL))] 5424 "TARGET_SIMD" 5425 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" 5426 [(set_attr "type" "neon_tbl4")] 5427) 5428 5429(define_insn "aarch64_qtbx4<mode>" 5430 [(set (match_operand:VB 0 "register_operand" "=w") 5431 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5432 (match_operand:XI 2 "register_operand" "w") 5433 (match_operand:VB 3 "register_operand" "w")] 5434 UNSPEC_TBX))] 5435 "TARGET_SIMD" 5436 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" 5437 [(set_attr "type" "neon_tbl4")] 5438) 5439 5440(define_insn_and_split "aarch64_combinev16qi" 5441 [(set (match_operand:OI 0 "register_operand" "=w") 5442 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") 5443 (match_operand:V16QI 2 "register_operand" "w")] 5444 UNSPEC_CONCAT))] 5445 "TARGET_SIMD" 5446 "#" 5447 "&& reload_completed" 5448 [(const_int 0)] 5449{ 5450 aarch64_split_combinev16qi (operands); 5451 DONE; 5452} 5453[(set_attr "type" "multiple")] 5454) 5455 5456(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" 5457 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5458 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5459 (match_operand:VALL_F16 2 "register_operand" "w")] 5460 PERMUTE))] 5461 "TARGET_SIMD" 5462 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 5463 [(set_attr "type" "neon_permute<q>")] 5464) 5465 5466;; Note immediate (third) operand is lane index not byte index. 5467(define_insn "aarch64_ext<mode>" 5468 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5469 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5470 (match_operand:VALL_F16 2 "register_operand" "w") 5471 (match_operand:SI 3 "immediate_operand" "i")] 5472 UNSPEC_EXT))] 5473 "TARGET_SIMD" 5474{ 5475 operands[3] = GEN_INT (INTVAL (operands[3]) 5476 * GET_MODE_UNIT_SIZE (<MODE>mode)); 5477 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 5478} 5479 [(set_attr "type" "neon_ext<q>")] 5480) 5481 5482(define_insn "aarch64_rev<REVERSE:rev_op><mode>" 5483 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5484 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 5485 REVERSE))] 5486 "TARGET_SIMD" 5487 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" 5488 [(set_attr "type" "neon_rev<q>")] 5489) 5490 5491(define_insn "aarch64_st2<mode>_dreg" 5492 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5493 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5494 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5495 UNSPEC_ST2))] 5496 "TARGET_SIMD" 5497 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5498 [(set_attr "type" "neon_store2_2reg")] 5499) 5500 5501(define_insn "aarch64_st2<mode>_dreg" 5502 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5503 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5504 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5505 UNSPEC_ST2))] 5506 "TARGET_SIMD" 5507 "st1\\t{%S1.1d - %T1.1d}, %0" 5508 [(set_attr "type" "neon_store1_2reg")] 5509) 5510 5511(define_insn "aarch64_st3<mode>_dreg" 5512 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5513 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5514 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5515 UNSPEC_ST3))] 5516 "TARGET_SIMD" 5517 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5518 [(set_attr "type" "neon_store3_3reg")] 5519) 5520 5521(define_insn "aarch64_st3<mode>_dreg" 5522 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5523 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5524 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5525 UNSPEC_ST3))] 5526 "TARGET_SIMD" 5527 "st1\\t{%S1.1d - %U1.1d}, %0" 5528 [(set_attr "type" "neon_store1_3reg")] 5529) 5530 5531(define_insn "aarch64_st4<mode>_dreg" 5532 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5533 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5534 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5535 UNSPEC_ST4))] 5536 "TARGET_SIMD" 5537 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5538 [(set_attr "type" "neon_store4_4reg")] 5539) 5540 5541(define_insn "aarch64_st4<mode>_dreg" 5542 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5543 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5544 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5545 UNSPEC_ST4))] 5546 "TARGET_SIMD" 5547 "st1\\t{%S1.1d - %V1.1d}, %0" 5548 [(set_attr "type" "neon_store1_4reg")] 5549) 5550 5551(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" 5552 [(match_operand:DI 0 "register_operand" "r") 5553 (match_operand:VSTRUCT 1 "register_operand" "w") 5554 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5555 "TARGET_SIMD" 5556{ 5557 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5558 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5559 5560 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); 5561 DONE; 5562}) 5563 5564(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" 5565 [(match_operand:DI 0 "register_operand" "r") 5566 (match_operand:VSTRUCT 1 "register_operand" "w") 5567 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5568 "TARGET_SIMD" 5569{ 5570 machine_mode mode = <VSTRUCT:MODE>mode; 5571 rtx mem = gen_rtx_MEM (mode, operands[0]); 5572 5573 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); 5574 DONE; 5575}) 5576 5577(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5578 [(match_operand:DI 0 "register_operand" "r") 5579 (match_operand:VSTRUCT 1 "register_operand" "w") 5580 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5581 (match_operand:SI 2 "immediate_operand")] 5582 "TARGET_SIMD" 5583{ 5584 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5585 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5586 * <VSTRUCT:nregs>); 5587 5588 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5589 mem, operands[1], operands[2])); 5590 DONE; 5591}) 5592 5593(define_expand "aarch64_st1<VALL_F16:mode>" 5594 [(match_operand:DI 0 "register_operand") 5595 (match_operand:VALL_F16 1 "register_operand")] 5596 "TARGET_SIMD" 5597{ 5598 machine_mode mode = <VALL_F16:MODE>mode; 5599 rtx mem = gen_rtx_MEM (mode, operands[0]); 5600 5601 if (BYTES_BIG_ENDIAN) 5602 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); 5603 else 5604 emit_move_insn (mem, operands[1]); 5605 DONE; 5606}) 5607 5608;; Expander for builtins to insert vector registers into large 5609;; opaque integer modes. 5610 5611;; Q-register list. We don't need a D-reg inserter as we zero 5612;; extend them in arm_neon.h and insert the resulting Q-regs. 5613 5614(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" 5615 [(match_operand:VSTRUCT 0 "register_operand" "+w") 5616 (match_operand:VSTRUCT 1 "register_operand" "0") 5617 (match_operand:VQ 2 "register_operand" "w") 5618 (match_operand:SI 3 "immediate_operand" "i")] 5619 "TARGET_SIMD" 5620{ 5621 int part = INTVAL (operands[3]); 5622 int offset = part * 16; 5623 5624 emit_move_insn (operands[0], operands[1]); 5625 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), 5626 operands[2]); 5627 DONE; 5628}) 5629 5630;; Standard pattern name vec_init<mode>. 5631 5632(define_expand "vec_init<mode>" 5633 [(match_operand:VALL_F16 0 "register_operand" "") 5634 (match_operand 1 "" "")] 5635 "TARGET_SIMD" 5636{ 5637 aarch64_expand_vector_init (operands[0], operands[1]); 5638 DONE; 5639}) 5640 5641(define_insn "*aarch64_simd_ld1r<mode>" 5642 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5643 (vec_duplicate:VALL_F16 5644 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] 5645 "TARGET_SIMD" 5646 "ld1r\\t{%0.<Vtype>}, %1" 5647 [(set_attr "type" "neon_load1_all_lanes")] 5648) 5649 5650(define_insn "aarch64_frecpe<mode>" 5651 [(set (match_operand:VHSDF 0 "register_operand" "=w") 5652 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 5653 UNSPEC_FRECPE))] 5654 "TARGET_SIMD" 5655 "frecpe\\t%0.<Vtype>, %1.<Vtype>" 5656 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 5657) 5658 5659(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" 5660 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 5661 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 5662 FRECP))] 5663 "TARGET_SIMD" 5664 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" 5665 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")] 5666) 5667 5668(define_insn "aarch64_frecps<mode>" 5669 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 5670 (unspec:VHSDF_HSDF 5671 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 5672 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 5673 UNSPEC_FRECPS))] 5674 "TARGET_SIMD" 5675 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5676 [(set_attr "type" "neon_fp_recps_<stype><q>")] 5677) 5678 5679(define_insn "aarch64_urecpe<mode>" 5680 [(set (match_operand:VDQ_SI 0 "register_operand" "=w") 5681 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 5682 UNSPEC_URECPE))] 5683 "TARGET_SIMD" 5684 "urecpe\\t%0.<Vtype>, %1.<Vtype>" 5685 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) 5686 5687;; Standard pattern name vec_extract<mode>. 5688 5689(define_expand "vec_extract<mode>" 5690 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") 5691 (match_operand:VALL_F16 1 "register_operand" "") 5692 (match_operand:SI 2 "immediate_operand" "")] 5693 "TARGET_SIMD" 5694{ 5695 emit_insn 5696 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); 5697 DONE; 5698}) 5699 5700;; aes 5701 5702(define_insn "aarch64_crypto_aes<aes_op>v16qi" 5703 [(set (match_operand:V16QI 0 "register_operand" "=w") 5704 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") 5705 (match_operand:V16QI 2 "register_operand" "w")] 5706 CRYPTO_AES))] 5707 "TARGET_SIMD && TARGET_CRYPTO" 5708 "aes<aes_op>\\t%0.16b, %2.16b" 5709 [(set_attr "type" "crypto_aese")] 5710) 5711 5712;; When AES/AESMC fusion is enabled we want the register allocation to 5713;; look like: 5714;; AESE Vn, _ 5715;; AESMC Vn, Vn 5716;; So prefer to tie operand 1 to operand 0 when fusing. 5717 5718(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 5719 [(set (match_operand:V16QI 0 "register_operand" "=w,w") 5720 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] 5721 CRYPTO_AESMC))] 5722 "TARGET_SIMD && TARGET_CRYPTO" 5723 "aes<aesmc_op>\\t%0.16b, %1.16b" 5724 [(set_attr "type" "crypto_aesmc") 5725 (set_attr_alternative "enabled" 5726 [(if_then_else (match_test 5727 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") 5728 (const_string "yes" ) 5729 (const_string "no")) 5730 (const_string "yes")])] 5731) 5732 5733;; sha1 5734 5735(define_insn "aarch64_crypto_sha1hsi" 5736 [(set (match_operand:SI 0 "register_operand" "=w") 5737 (unspec:SI [(match_operand:SI 1 5738 "register_operand" "w")] 5739 UNSPEC_SHA1H))] 5740 "TARGET_SIMD && TARGET_CRYPTO" 5741 "sha1h\\t%s0, %s1" 5742 [(set_attr "type" "crypto_sha1_fast")] 5743) 5744 5745(define_insn "aarch64_crypto_sha1hv4si" 5746 [(set (match_operand:SI 0 "register_operand" "=w") 5747 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 5748 (parallel [(const_int 0)]))] 5749 UNSPEC_SHA1H))] 5750 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN" 5751 "sha1h\\t%s0, %s1" 5752 [(set_attr "type" "crypto_sha1_fast")] 5753) 5754 5755(define_insn "aarch64_be_crypto_sha1hv4si" 5756 [(set (match_operand:SI 0 "register_operand" "=w") 5757 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 5758 (parallel [(const_int 3)]))] 5759 UNSPEC_SHA1H))] 5760 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN" 5761 "sha1h\\t%s0, %s1" 5762 [(set_attr "type" "crypto_sha1_fast")] 5763) 5764 5765(define_insn "aarch64_crypto_sha1su1v4si" 5766 [(set (match_operand:V4SI 0 "register_operand" "=w") 5767 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5768 (match_operand:V4SI 2 "register_operand" "w")] 5769 UNSPEC_SHA1SU1))] 5770 "TARGET_SIMD && TARGET_CRYPTO" 5771 "sha1su1\\t%0.4s, %2.4s" 5772 [(set_attr "type" "crypto_sha1_fast")] 5773) 5774 5775(define_insn "aarch64_crypto_sha1<sha1_op>v4si" 5776 [(set (match_operand:V4SI 0 "register_operand" "=w") 5777 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5778 (match_operand:SI 2 "register_operand" "w") 5779 (match_operand:V4SI 3 "register_operand" "w")] 5780 CRYPTO_SHA1))] 5781 "TARGET_SIMD && TARGET_CRYPTO" 5782 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 5783 [(set_attr "type" "crypto_sha1_slow")] 5784) 5785 5786(define_insn "aarch64_crypto_sha1su0v4si" 5787 [(set (match_operand:V4SI 0 "register_operand" "=w") 5788 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5789 (match_operand:V4SI 2 "register_operand" "w") 5790 (match_operand:V4SI 3 "register_operand" "w")] 5791 UNSPEC_SHA1SU0))] 5792 "TARGET_SIMD && TARGET_CRYPTO" 5793 "sha1su0\\t%0.4s, %2.4s, %3.4s" 5794 [(set_attr "type" "crypto_sha1_xor")] 5795) 5796 5797;; sha256 5798 5799(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" 5800 [(set (match_operand:V4SI 0 "register_operand" "=w") 5801 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5802 (match_operand:V4SI 2 "register_operand" "w") 5803 (match_operand:V4SI 3 "register_operand" "w")] 5804 CRYPTO_SHA256))] 5805 "TARGET_SIMD && TARGET_CRYPTO" 5806 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 5807 [(set_attr "type" "crypto_sha256_slow")] 5808) 5809 5810(define_insn "aarch64_crypto_sha256su0v4si" 5811 [(set (match_operand:V4SI 0 "register_operand" "=w") 5812 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5813 (match_operand:V4SI 2 "register_operand" "w")] 5814 UNSPEC_SHA256SU0))] 5815 "TARGET_SIMD &&TARGET_CRYPTO" 5816 "sha256su0\\t%0.4s, %2.4s" 5817 [(set_attr "type" "crypto_sha256_fast")] 5818) 5819 5820(define_insn "aarch64_crypto_sha256su1v4si" 5821 [(set (match_operand:V4SI 0 "register_operand" "=w") 5822 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5823 (match_operand:V4SI 2 "register_operand" "w") 5824 (match_operand:V4SI 3 "register_operand" "w")] 5825 UNSPEC_SHA256SU1))] 5826 "TARGET_SIMD &&TARGET_CRYPTO" 5827 "sha256su1\\t%0.4s, %2.4s, %3.4s" 5828 [(set_attr "type" "crypto_sha256_slow")] 5829) 5830 5831;; pmull 5832 5833(define_insn "aarch64_crypto_pmulldi" 5834 [(set (match_operand:TI 0 "register_operand" "=w") 5835 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 5836 (match_operand:DI 2 "register_operand" "w")] 5837 UNSPEC_PMULL))] 5838 "TARGET_SIMD && TARGET_CRYPTO" 5839 "pmull\\t%0.1q, %1.1d, %2.1d" 5840 [(set_attr "type" "neon_mul_d_long")] 5841) 5842 5843(define_insn "aarch64_crypto_pmullv2di" 5844 [(set (match_operand:TI 0 "register_operand" "=w") 5845 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 5846 (match_operand:V2DI 2 "register_operand" "w")] 5847 UNSPEC_PMULL2))] 5848 "TARGET_SIMD && TARGET_CRYPTO" 5849 "pmull2\\t%0.1q, %1.2d, %2.2d" 5850 [(set_attr "type" "neon_mul_d_long")] 5851) 5852