1;; ARM NEON coprocessor Machine Description 2;; Copyright (C) 2006-2020 Free Software Foundation, Inc. 3;; Written by CodeSourcery. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21 22;; Attribute used to permit string comparisons against <VQH_mnem> in 23;; type attribute definitions. 24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) 25 26(define_insn "unaligned_storev8qi" 27 [(set (match_operand:V8QI 0 "memory_operand" "=Un") 28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")] 29 UNSPEC_UNALIGNED_STORE))] 30 "TARGET_NEON" 31 "* 32 return output_move_neon (operands); 33 " 34 [(set_attr "type" "neon_store1_1reg")]) 35 36(define_insn "*neon_mov<mode>" 37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand" 38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r") 39 (match_operand:VDXMOV 1 "general_operand" 40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))] 41 "TARGET_NEON 42 && (register_operand (operands[0], <MODE>mode) 43 || register_operand (operands[1], <MODE>mode))" 44{ 45 if (which_alternative == 2 || which_alternative == 3) 46 { 47 int width, is_valid; 48 static char templ[40]; 49 50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, 51 &operands[1], &width); 52 53 gcc_assert (is_valid != 0); 54 55 if (width == 0) 56 return "vmov.f32\t%P0, %1 @ <mode>"; 57 else 58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); 59 60 return templ; 61 } 62 63 switch (which_alternative) 64 { 65 case 0: return "vmov\t%P0, %P1 @ <mode>"; 66 case 1: case 4: return output_move_neon (operands); 67 case 2: case 3: gcc_unreachable (); 68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>"; 69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>"; 70 case 9: return "#"; 71 default: return output_move_double (operands, true, NULL); 72 } 73} 74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ 75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\ 76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\ 77 multiple") 78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8") 79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*") 80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*") 81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")]) 82 83(define_insn "*neon_mov<mode>" 84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" 85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") 86 (match_operand:VQXMOV 1 "general_operand" 87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))] 88 "TARGET_NEON 89 && (register_operand (operands[0], <MODE>mode) 90 || register_operand (operands[1], <MODE>mode))" 91{ 92 if (which_alternative == 2 || which_alternative == 3) 93 { 94 int width, is_valid; 95 static char templ[40]; 96 97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, 98 &operands[1], &width); 99 100 gcc_assert (is_valid != 0); 101 102 if (width == 0) 103 return "vmov.f32\t%q0, %1 @ <mode>"; 104 else 105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); 106 107 return templ; 108 } 109 110 switch (which_alternative) 111 { 112 case 0: return "vmov\t%q0, %q1 @ <mode>"; 113 case 1: case 4: return output_move_neon (operands); 114 case 2: case 3: gcc_unreachable (); 115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; 116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; 117 default: return output_move_quad (operands); 118 } 119} 120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ 121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\ 122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg") 123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16") 124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") 125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") 126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")]) 127 128/* We define these mov expanders to match the standard mov$a optab to prevent 129 the mid-end from trying to do a subreg for these modes which is the most 130 inefficient way to expand the move. Also big-endian subreg's aren't 131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. 132 Without these RTL generation patterns the mid-end would attempt to take a 133 sub-reg and may ICE if it can't. */ 134 135(define_expand "movti" 136 [(set (match_operand:TI 0 "nonimmediate_operand") 137 (match_operand:TI 1 "general_operand"))] 138 "TARGET_NEON" 139{ 140 gcc_checking_assert (aligned_operand (operands[0], TImode)); 141 gcc_checking_assert (aligned_operand (operands[1], TImode)); 142 if (can_create_pseudo_p ()) 143 { 144 if (!REG_P (operands[0])) 145 operands[1] = force_reg (TImode, operands[1]); 146 } 147}) 148 149(define_expand "mov<mode>" 150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") 151 (match_operand:VSTRUCT 1 "general_operand"))] 152 "TARGET_NEON || TARGET_HAVE_MVE" 153{ 154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); 155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); 156 if (can_create_pseudo_p ()) 157 { 158 if (!REG_P (operands[0])) 159 operands[1] = force_reg (<MODE>mode, operands[1]); 160 } 161}) 162 163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into 164;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved 165;; into vec-common.md file. Remaining mov expand patterns with half float and 166;; bfloats are implemented below. 167(define_expand "mov<mode>" 168 [(set (match_operand:VHFBF_split 0 "s_register_operand") 169 (match_operand:VHFBF_split 1 "s_register_operand"))] 170 "TARGET_NEON" 171{ 172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); 173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); 174 if (can_create_pseudo_p ()) 175 { 176 if (!REG_P (operands[0])) 177 operands[1] = force_reg (<MODE>mode, operands[1]); 178 } 179}) 180 181(define_insn "*neon_mov<mode>" 182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") 183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] 184 "(TARGET_NEON || TARGET_HAVE_MVE) 185 && (register_operand (operands[0], <MODE>mode) 186 || register_operand (operands[1], <MODE>mode))" 187{ 188 switch (which_alternative) 189 { 190 case 0: return "#"; 191 case 1: case 2: return output_move_neon (operands); 192 default: gcc_unreachable (); 193 } 194} 195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") 196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) 197 198(define_split 199 [(set (match_operand:EI 0 "s_register_operand" "") 200 (match_operand:EI 1 "s_register_operand" ""))] 201 "TARGET_NEON && reload_completed" 202 [(set (match_dup 0) (match_dup 1)) 203 (set (match_dup 2) (match_dup 3))] 204{ 205 int rdest = REGNO (operands[0]); 206 int rsrc = REGNO (operands[1]); 207 rtx dest[2], src[2]; 208 209 dest[0] = gen_rtx_REG (TImode, rdest); 210 src[0] = gen_rtx_REG (TImode, rsrc); 211 dest[1] = gen_rtx_REG (DImode, rdest + 4); 212 src[1] = gen_rtx_REG (DImode, rsrc + 4); 213 214 neon_disambiguate_copy (operands, dest, src, 2); 215}) 216 217(define_split 218 [(set (match_operand:OI 0 "s_register_operand" "") 219 (match_operand:OI 1 "s_register_operand" ""))] 220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed" 221 [(set (match_dup 0) (match_dup 1)) 222 (set (match_dup 2) (match_dup 3))] 223{ 224 int rdest = REGNO (operands[0]); 225 int rsrc = REGNO (operands[1]); 226 rtx dest[2], src[2]; 227 228 dest[0] = gen_rtx_REG (TImode, rdest); 229 src[0] = gen_rtx_REG (TImode, rsrc); 230 dest[1] = gen_rtx_REG (TImode, rdest + 4); 231 src[1] = gen_rtx_REG (TImode, rsrc + 4); 232 233 neon_disambiguate_copy (operands, dest, src, 2); 234}) 235 236(define_split 237 [(set (match_operand:CI 0 "s_register_operand" "") 238 (match_operand:CI 1 "s_register_operand" ""))] 239 "TARGET_NEON && reload_completed" 240 [(set (match_dup 0) (match_dup 1)) 241 (set (match_dup 2) (match_dup 3)) 242 (set (match_dup 4) (match_dup 5))] 243{ 244 int rdest = REGNO (operands[0]); 245 int rsrc = REGNO (operands[1]); 246 rtx dest[3], src[3]; 247 248 dest[0] = gen_rtx_REG (TImode, rdest); 249 src[0] = gen_rtx_REG (TImode, rsrc); 250 dest[1] = gen_rtx_REG (TImode, rdest + 4); 251 src[1] = gen_rtx_REG (TImode, rsrc + 4); 252 dest[2] = gen_rtx_REG (TImode, rdest + 8); 253 src[2] = gen_rtx_REG (TImode, rsrc + 8); 254 255 neon_disambiguate_copy (operands, dest, src, 3); 256}) 257 258(define_split 259 [(set (match_operand:XI 0 "s_register_operand" "") 260 (match_operand:XI 1 "s_register_operand" ""))] 261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed" 262 [(set (match_dup 0) (match_dup 1)) 263 (set (match_dup 2) (match_dup 3)) 264 (set (match_dup 4) (match_dup 5)) 265 (set (match_dup 6) (match_dup 7))] 266{ 267 int rdest = REGNO (operands[0]); 268 int rsrc = REGNO (operands[1]); 269 rtx dest[4], src[4]; 270 271 dest[0] = gen_rtx_REG (TImode, rdest); 272 src[0] = gen_rtx_REG (TImode, rsrc); 273 dest[1] = gen_rtx_REG (TImode, rdest + 4); 274 src[1] = gen_rtx_REG (TImode, rsrc + 4); 275 dest[2] = gen_rtx_REG (TImode, rdest + 8); 276 src[2] = gen_rtx_REG (TImode, rsrc + 8); 277 dest[3] = gen_rtx_REG (TImode, rdest + 12); 278 src[3] = gen_rtx_REG (TImode, rsrc + 12); 279 280 neon_disambiguate_copy (operands, dest, src, 4); 281}) 282 283(define_expand "movmisalign<mode>" 284 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") 285 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] 286 UNSPEC_MISALIGNED_ACCESS))] 287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 288{ 289 rtx adjust_mem; 290 /* This pattern is not permitted to fail during expansion: if both arguments 291 are non-registers (e.g. memory := constant, which can be created by the 292 auto-vectorizer), force operand 1 into a register. */ 293 if (!s_register_operand (operands[0], <MODE>mode) 294 && !s_register_operand (operands[1], <MODE>mode)) 295 operands[1] = force_reg (<MODE>mode, operands[1]); 296 297 if (s_register_operand (operands[0], <MODE>mode)) 298 adjust_mem = operands[1]; 299 else 300 adjust_mem = operands[0]; 301 302 /* Legitimize address. */ 303 if (!neon_vector_mem_operand (adjust_mem, 2, true)) 304 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); 305 306}) 307 308(define_insn "*movmisalign<mode>_neon_store" 309 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") 310 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] 311 UNSPEC_MISALIGNED_ACCESS))] 312 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 313 "vst1.<V_sz_elem>\t{%P1}, %A0" 314 [(set_attr "type" "neon_store1_1reg<q>")]) 315 316(define_insn "*movmisalign<mode>_neon_load" 317 [(set (match_operand:VDX 0 "s_register_operand" "=w") 318 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" 319 " Um")] 320 UNSPEC_MISALIGNED_ACCESS))] 321 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 322 "vld1.<V_sz_elem>\t{%P0}, %A1" 323 [(set_attr "type" "neon_load1_1reg<q>")]) 324 325(define_insn "*movmisalign<mode>_neon_store" 326 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") 327 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] 328 UNSPEC_MISALIGNED_ACCESS))] 329 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 330 "vst1.<V_sz_elem>\t{%q1}, %A0" 331 [(set_attr "type" "neon_store1_1reg<q>")]) 332 333(define_insn "*movmisalign<mode>_neon_load" 334 [(set (match_operand:VQX 0 "s_register_operand" "=w") 335 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" 336 " Um")] 337 UNSPEC_MISALIGNED_ACCESS))] 338 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 339 "vld1.<V_sz_elem>\t{%q0}, %A1" 340 [(set_attr "type" "neon_load1_1reg<q>")]) 341 342(define_insn "@vec_set<mode>_internal" 343 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") 344 (vec_merge:VD_LANE 345 (vec_duplicate:VD_LANE 346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 347 (match_operand:VD_LANE 3 "s_register_operand" "0,0") 348 (match_operand:SI 2 "immediate_operand" "i,i")))] 349 "TARGET_NEON" 350{ 351 int elt = ffs ((int) INTVAL (operands[2])) - 1; 352 if (BYTES_BIG_ENDIAN) 353 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 354 operands[2] = GEN_INT (elt); 355 356 if (which_alternative == 0) 357 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 358 else 359 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 360} 361 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) 362 363(define_insn "@vec_set<mode>_internal" 364 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") 365 (vec_merge:VQ2 366 (vec_duplicate:VQ2 367 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 368 (match_operand:VQ2 3 "s_register_operand" "0,0") 369 (match_operand:SI 2 "immediate_operand" "i,i")))] 370 "TARGET_NEON" 371{ 372 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 373 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 374 int elt = elem % half_elts; 375 int hi = (elem / half_elts) * 2; 376 int regno = REGNO (operands[0]); 377 378 if (BYTES_BIG_ENDIAN) 379 elt = half_elts - 1 - elt; 380 381 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); 382 operands[2] = GEN_INT (elt); 383 384 if (which_alternative == 0) 385 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 386 else 387 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 388} 389 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] 390) 391 392(define_insn "@vec_set<mode>_internal" 393 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w") 394 (vec_merge:V2DI_ONLY 395 (vec_duplicate:V2DI_ONLY 396 (match_operand:DI 1 "nonimmediate_operand" "Um,r")) 397 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0") 398 (match_operand:SI 2 "immediate_operand" "i,i")))] 399 "TARGET_NEON" 400{ 401 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 402 int regno = REGNO (operands[0]) + 2 * elem; 403 404 operands[0] = gen_rtx_REG (DImode, regno); 405 406 if (which_alternative == 0) 407 return "vld1.64\t%P0, %A1"; 408 else 409 return "vmov\t%P0, %Q1, %R1"; 410} 411 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] 412) 413 414(define_insn "vec_extract<mode><V_elem_l>" 415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 416 (vec_select:<V_elem> 417 (match_operand:VD_LANE 1 "s_register_operand" "w,w") 418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 419 "TARGET_NEON" 420{ 421 if (BYTES_BIG_ENDIAN) 422 { 423 int elt = INTVAL (operands[2]); 424 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 425 operands[2] = GEN_INT (elt); 426 } 427 428 if (which_alternative == 0) 429 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 430 else 431 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 432} 433 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 434) 435 436;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to 437;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called 438;; by define_expand in vec-common.md file. 439(define_insn "neon_vec_extract<mode><V_elem_l>" 440 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 441 (vec_select:<V_elem> 442 (match_operand:VQ2 1 "s_register_operand" "w,w") 443 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 444 "TARGET_NEON" 445{ 446 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 447 int elt = INTVAL (operands[2]) % half_elts; 448 int hi = (INTVAL (operands[2]) / half_elts) * 2; 449 int regno = REGNO (operands[1]); 450 451 if (BYTES_BIG_ENDIAN) 452 elt = half_elts - 1 - elt; 453 454 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); 455 operands[2] = GEN_INT (elt); 456 457 if (which_alternative == 0) 458 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 459 else 460 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 461} 462 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 463) 464 465;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi" 466;; and this pattern is called by define_expand in vec-common.md file. 467(define_insn "neon_vec_extractv2didi" 468 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") 469 (vec_select:DI 470 (match_operand:V2DI 1 "s_register_operand" "w,w") 471 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 472 "TARGET_NEON" 473{ 474 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); 475 476 operands[1] = gen_rtx_REG (DImode, regno); 477 478 if (which_alternative == 0) 479 return "vst1.64\t{%P1}, %A0 @ v2di"; 480 else 481 return "vmov\t%Q0, %R0, %P1 @ v2di"; 482} 483 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] 484) 485 486(define_expand "vec_init<mode><V_elem_l>" 487 [(match_operand:VDQ 0 "s_register_operand") 488 (match_operand 1 "" "")] 489 "TARGET_NEON || TARGET_HAVE_MVE" 490{ 491 neon_expand_vector_init (operands[0], operands[1]); 492 DONE; 493}) 494 495;; Doubleword and quadword arithmetic. 496 497;; NOTE: some other instructions also support 64-bit integer 498;; element size, which we could potentially use for "long long" operations. 499 500(define_insn "*add<mode>3_neon" 501 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 502 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 503 (match_operand:VDQ 2 "s_register_operand" "w")))] 504 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 505 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 506 [(set (attr "type") 507 (if_then_else (match_test "<Is_float_mode>") 508 (const_string "neon_fp_addsub_s<q>") 509 (const_string "neon_add<q>")))] 510) 511 512;; As with SFmode, full support for HFmode vector arithmetic is only available 513;; when flag-unsafe-math-optimizations is enabled. 514 515;; Add pattern with modes V8HF and V4HF is split into separate patterns to add 516;; support for standard pattern addv8hf3 in MVE. Following pattern is called 517;; from "addv8hf3" standard pattern inside vec-common.md file. 518 519(define_insn "addv8hf3_neon" 520 [(set 521 (match_operand:V8HF 0 "s_register_operand" "=w") 522 (plus:V8HF 523 (match_operand:V8HF 1 "s_register_operand" "w") 524 (match_operand:V8HF 2 "s_register_operand" "w")))] 525 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 526 "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 527 [(set_attr "type" "neon_fp_addsub_s_q")] 528) 529 530(define_insn "addv4hf3" 531 [(set 532 (match_operand:V4HF 0 "s_register_operand" "=w") 533 (plus:V4HF 534 (match_operand:V4HF 1 "s_register_operand" "w") 535 (match_operand:V4HF 2 "s_register_operand" "w")))] 536 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 537 "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 538 [(set_attr "type" "neon_fp_addsub_s_q")] 539) 540 541(define_insn "add<mode>3_fp16" 542 [(set 543 (match_operand:VH 0 "s_register_operand" "=w") 544 (plus:VH 545 (match_operand:VH 1 "s_register_operand" "w") 546 (match_operand:VH 2 "s_register_operand" "w")))] 547 "TARGET_NEON_FP16INST" 548 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 549 [(set (attr "type") 550 (if_then_else (match_test "<Is_float_mode>") 551 (const_string "neon_fp_addsub_s<q>") 552 (const_string "neon_add<q>")))] 553) 554 555(define_insn "*sub<mode>3_neon" 556 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 557 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 558 (match_operand:VDQ 2 "s_register_operand" "w")))] 559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 560 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 561 [(set (attr "type") 562 (if_then_else (match_test "<Is_float_mode>") 563 (const_string "neon_fp_addsub_s<q>") 564 (const_string "neon_sub<q>")))] 565) 566 567(define_insn "sub<mode>3" 568 [(set 569 (match_operand:VH 0 "s_register_operand" "=w") 570 (minus:VH 571 (match_operand:VH 1 "s_register_operand" "w") 572 (match_operand:VH 2 "s_register_operand" "w")))] 573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 575 [(set_attr "type" "neon_sub<q>")] 576) 577 578(define_insn "sub<mode>3_fp16" 579 [(set 580 (match_operand:VH 0 "s_register_operand" "=w") 581 (minus:VH 582 (match_operand:VH 1 "s_register_operand" "w") 583 (match_operand:VH 2 "s_register_operand" "w")))] 584 "TARGET_NEON_FP16INST" 585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 586 [(set_attr "type" "neon_sub<q>")] 587) 588 589(define_insn "*mul<mode>3_neon" 590 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 591 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 592 (match_operand:VDQW 2 "s_register_operand" "w")))] 593 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 594 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 595 [(set (attr "type") 596 (if_then_else (match_test "<Is_float_mode>") 597 (const_string "neon_fp_mul_s<q>") 598 (const_string "neon_mul_<V_elem_ch><q>")))] 599) 600 601/* Perform division using multiply-by-reciprocal. 602 Reciprocal is calculated using Newton-Raphson method. 603 Enabled with -funsafe-math-optimizations -freciprocal-math 604 and disabled for -Os since it increases code size . */ 605 606(define_expand "div<mode>3" 607 [(set (match_operand:VCVTF 0 "s_register_operand") 608 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand") 609 (match_operand:VCVTF 2 "s_register_operand")))] 610 "TARGET_NEON && !optimize_size 611 && flag_reciprocal_math" 612 { 613 rtx rec = gen_reg_rtx (<MODE>mode); 614 rtx vrecps_temp = gen_reg_rtx (<MODE>mode); 615 616 /* Reciprocal estimate. */ 617 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2])); 618 619 /* Perform 2 iterations of newton-raphson method. */ 620 for (int i = 0; i < 2; i++) 621 { 622 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2])); 623 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp)); 624 } 625 626 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */ 627 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec)); 628 DONE; 629 } 630) 631 632 633(define_insn "mul<mode>3add<mode>_neon" 634 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 635 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 636 (match_operand:VDQW 3 "s_register_operand" "w")) 637 (match_operand:VDQW 1 "s_register_operand" "0")))] 638 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 639 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 640 [(set (attr "type") 641 (if_then_else (match_test "<Is_float_mode>") 642 (const_string "neon_fp_mla_s<q>") 643 (const_string "neon_mla_<V_elem_ch><q>")))] 644) 645 646(define_insn "mul<mode>3add<mode>_neon" 647 [(set (match_operand:VH 0 "s_register_operand" "=w") 648 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") 649 (match_operand:VH 3 "s_register_operand" "w")) 650 (match_operand:VH 1 "s_register_operand" "0")))] 651 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 652 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 653 [(set_attr "type" "neon_fp_mla_s<q>")] 654) 655 656(define_insn "mul<mode>3neg<mode>add<mode>_neon" 657 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 658 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") 659 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 660 (match_operand:VDQW 3 "s_register_operand" "w"))))] 661 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 662 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 663 [(set (attr "type") 664 (if_then_else (match_test "<Is_float_mode>") 665 (const_string "neon_fp_mla_s<q>") 666 (const_string "neon_mla_<V_elem_ch><q>")))] 667) 668 669;; Fused multiply-accumulate 670;; We define each insn twice here: 671;; 1: with flag_unsafe_math_optimizations for the widening multiply phase 672;; to be able to use when converting to FMA. 673;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. 674(define_insn "fma<VCVTF:mode>4" 675 [(set (match_operand:VCVTF 0 "register_operand" "=w") 676 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 677 (match_operand:VCVTF 2 "register_operand" "w") 678 (match_operand:VCVTF 3 "register_operand" "0")))] 679 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 680 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 681 [(set_attr "type" "neon_fp_mla_s<q>")] 682) 683 684(define_insn "fma<VCVTF:mode>4_intrinsic" 685 [(set (match_operand:VCVTF 0 "register_operand" "=w") 686 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 687 (match_operand:VCVTF 2 "register_operand" "w") 688 (match_operand:VCVTF 3 "register_operand" "0")))] 689 "TARGET_NEON && TARGET_FMA" 690 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 691 [(set_attr "type" "neon_fp_mla_s<q>")] 692) 693 694(define_insn "fma<VH:mode>4" 695 [(set (match_operand:VH 0 "register_operand" "=w") 696 (fma:VH 697 (match_operand:VH 1 "register_operand" "w") 698 (match_operand:VH 2 "register_operand" "w") 699 (match_operand:VH 3 "register_operand" "0")))] 700 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 701 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 702 [(set_attr "type" "neon_fp_mla_s<q>")] 703) 704 705(define_insn "fma<VH:mode>4_intrinsic" 706 [(set (match_operand:VH 0 "register_operand" "=w") 707 (fma:VH 708 (match_operand:VH 1 "register_operand" "w") 709 (match_operand:VH 2 "register_operand" "w") 710 (match_operand:VH 3 "register_operand" "0")))] 711 "TARGET_NEON_FP16INST" 712 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 713 [(set_attr "type" "neon_fp_mla_s<q>")] 714) 715 716(define_insn "*fmsub<VCVTF:mode>4" 717 [(set (match_operand:VCVTF 0 "register_operand" "=w") 718 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 719 (match_operand:VCVTF 2 "register_operand" "w") 720 (match_operand:VCVTF 3 "register_operand" "0")))] 721 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 722 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 723 [(set_attr "type" "neon_fp_mla_s<q>")] 724) 725 726(define_insn "fmsub<VCVTF:mode>4_intrinsic" 727 [(set (match_operand:VCVTF 0 "register_operand" "=w") 728 (fma:VCVTF 729 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 730 (match_operand:VCVTF 2 "register_operand" "w") 731 (match_operand:VCVTF 3 "register_operand" "0")))] 732 "TARGET_NEON && TARGET_FMA" 733 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 734 [(set_attr "type" "neon_fp_mla_s<q>")] 735) 736 737(define_insn "fmsub<VH:mode>4_intrinsic" 738 [(set (match_operand:VH 0 "register_operand" "=w") 739 (fma:VH 740 (neg:VH (match_operand:VH 1 "register_operand" "w")) 741 (match_operand:VH 2 "register_operand" "w") 742 (match_operand:VH 3 "register_operand" "0")))] 743 "TARGET_NEON_FP16INST" 744 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 745 [(set_attr "type" "neon_fp_mla_s<q>")] 746) 747 748(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" 749 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 750 (unspec:VCVTF [(match_operand:VCVTF 1 751 "s_register_operand" "w")] 752 NEON_VRINT))] 753 "TARGET_NEON && TARGET_VFP5" 754 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" 755 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] 756) 757 758(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" 759 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 760 (FIXUORS:<V_cmp_result> (unspec:VCVTF 761 [(match_operand:VCVTF 1 "register_operand" "w")] 762 NEON_VCVT)))] 763 "TARGET_NEON && TARGET_VFP5" 764 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" 765 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") 766 (set_attr "predicable" "no")] 767) 768 769(define_insn "ior<mode>3" 770 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 771 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 772 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] 773 "TARGET_NEON" 774{ 775 switch (which_alternative) 776 { 777 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 778 case 1: return neon_output_logic_immediate ("vorr", &operands[2], 779 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); 780 default: gcc_unreachable (); 781 } 782} 783 [(set_attr "type" "neon_logic<q>")] 784) 785 786;; The concrete forms of the Neon immediate-logic instructions are vbic and 787;; vorr. We support the pseudo-instruction vand instead, because that 788;; corresponds to the canonical form the middle-end expects to use for 789;; immediate bitwise-ANDs. 790 791(define_insn "and<mode>3" 792 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 793 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 794 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] 795 "TARGET_NEON" 796{ 797 switch (which_alternative) 798 { 799 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 800 case 1: return neon_output_logic_immediate ("vand", &operands[2], 801 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); 802 default: gcc_unreachable (); 803 } 804} 805 [(set_attr "type" "neon_logic<q>")] 806) 807 808(define_insn "orn<mode>3_neon" 809 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 810 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 811 (match_operand:VDQ 1 "s_register_operand" "w")))] 812 "TARGET_NEON" 813 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 814 [(set_attr "type" "neon_logic<q>")] 815) 816 817(define_insn "bic<mode>3_neon" 818 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 819 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 820 (match_operand:VDQ 1 "s_register_operand" "w")))] 821 "TARGET_NEON" 822 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 823 [(set_attr "type" "neon_logic<q>")] 824) 825 826(define_insn "xor<mode>3" 827 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 828 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 829 (match_operand:VDQ 2 "s_register_operand" "w")))] 830 "TARGET_NEON" 831 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 832 [(set_attr "type" "neon_logic<q>")] 833) 834 835(define_insn "one_cmpl<mode>2" 836 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 837 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] 838 "TARGET_NEON" 839 "vmvn\t%<V_reg>0, %<V_reg>1" 840 [(set_attr "type" "neon_move<q>")] 841) 842 843(define_insn "abs<mode>2" 844 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 845 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 846 "TARGET_NEON" 847 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 848 [(set (attr "type") 849 (if_then_else (match_test "<Is_float_mode>") 850 (const_string "neon_fp_abs_s<q>") 851 (const_string "neon_abs<q>")))] 852) 853 854(define_insn "neg<mode>2" 855 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 856 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 857 "TARGET_NEON" 858 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 859 [(set (attr "type") 860 (if_then_else (match_test "<Is_float_mode>") 861 (const_string "neon_fp_neg_s<q>") 862 (const_string "neon_neg<q>")))] 863) 864 865(define_insn "<absneg_str><mode>2" 866 [(set (match_operand:VH 0 "s_register_operand" "=w") 867 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] 868 "TARGET_NEON_FP16INST" 869 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 870 [(set_attr "type" "neon_abs<q>")] 871) 872 873(define_expand "neon_v<absneg_str><mode>" 874 [(set 875 (match_operand:VH 0 "s_register_operand") 876 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] 877 "TARGET_NEON_FP16INST" 878{ 879 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1])); 880 DONE; 881}) 882 883(define_insn "neon_v<fp16_rnd_str><mode>" 884 [(set (match_operand:VH 0 "s_register_operand" "=w") 885 (unspec:VH 886 [(match_operand:VH 1 "s_register_operand" "w")] 887 FP16_RND))] 888 "TARGET_NEON_FP16INST" 889 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 890 [(set_attr "type" "neon_fp_round_s<q>")] 891) 892 893(define_insn "neon_vrsqrte<mode>" 894 [(set (match_operand:VH 0 "s_register_operand" "=w") 895 (unspec:VH 896 [(match_operand:VH 1 "s_register_operand" "w")] 897 UNSPEC_VRSQRTE))] 898 "TARGET_NEON_FP16INST" 899 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" 900 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 901) 902 903(define_insn "*umin<mode>3_neon" 904 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 905 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 906 (match_operand:VDQIW 2 "s_register_operand" "w")))] 907 "TARGET_NEON" 908 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 909 [(set_attr "type" "neon_minmax<q>")] 910) 911 912(define_insn "*umax<mode>3_neon" 913 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 914 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 915 (match_operand:VDQIW 2 "s_register_operand" "w")))] 916 "TARGET_NEON" 917 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 918 [(set_attr "type" "neon_minmax<q>")] 919) 920 921(define_insn "*smin<mode>3_neon" 922 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 923 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 924 (match_operand:VDQW 2 "s_register_operand" "w")))] 925 "TARGET_NEON" 926 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 927 [(set (attr "type") 928 (if_then_else (match_test "<Is_float_mode>") 929 (const_string "neon_fp_minmax_s<q>") 930 (const_string "neon_minmax<q>")))] 931) 932 933(define_insn "*smax<mode>3_neon" 934 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 935 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 936 (match_operand:VDQW 2 "s_register_operand" "w")))] 937 "TARGET_NEON" 938 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 939 [(set (attr "type") 940 (if_then_else (match_test "<Is_float_mode>") 941 (const_string "neon_fp_minmax_s<q>") 942 (const_string "neon_minmax<q>")))] 943) 944 945; TODO: V2DI shifts are current disabled because there are bugs in the 946; generic vectorizer code. It ends up creating a V2DI constructor with 947; SImode elements. 948 949(define_insn "vashl<mode>3" 950 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") 951 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") 952 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))] 953 "TARGET_NEON" 954 { 955 switch (which_alternative) 956 { 957 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 958 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], 959 <MODE>mode, 960 VALID_NEON_QREG_MODE (<MODE>mode), 961 true); 962 default: gcc_unreachable (); 963 } 964 } 965 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] 966) 967 968(define_insn "vashr<mode>3_imm" 969 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 970 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 971 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 972 "TARGET_NEON" 973 { 974 return neon_output_shift_immediate ("vshr", 's', &operands[2], 975 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 976 false); 977 } 978 [(set_attr "type" "neon_shift_imm<q>")] 979) 980 981(define_insn "vlshr<mode>3_imm" 982 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 983 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 984 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 985 "TARGET_NEON" 986 { 987 return neon_output_shift_immediate ("vshr", 'u', &operands[2], 988 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 989 false); 990 } 991 [(set_attr "type" "neon_shift_imm<q>")] 992) 993 994; Used for implementing logical shift-right, which is a left-shift by a negative 995; amount, with signed operands. This is essentially the same as ashl<mode>3 996; above, but using an unspec in case GCC tries anything tricky with negative 997; shift amounts. 998 999(define_insn "ashl<mode>3_signed" 1000 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 1001 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 1002 (match_operand:VDQI 2 "s_register_operand" "w")] 1003 UNSPEC_ASHIFT_SIGNED))] 1004 "TARGET_NEON" 1005 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1006 [(set_attr "type" "neon_shift_reg<q>")] 1007) 1008 1009; Used for implementing logical shift-right, which is a left-shift by a negative 1010; amount, with unsigned operands. 1011 1012(define_insn "ashl<mode>3_unsigned" 1013 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 1014 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 1015 (match_operand:VDQI 2 "s_register_operand" "w")] 1016 UNSPEC_ASHIFT_UNSIGNED))] 1017 "TARGET_NEON" 1018 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1019 [(set_attr "type" "neon_shift_reg<q>")] 1020) 1021 1022(define_expand "vashr<mode>3" 1023 [(set (match_operand:VDQIW 0 "s_register_operand") 1024 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") 1025 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] 1026 "TARGET_NEON" 1027{ 1028 if (s_register_operand (operands[2], <MODE>mode)) 1029 { 1030 rtx neg = gen_reg_rtx (<MODE>mode); 1031 emit_insn (gen_neg<mode>2 (neg, operands[2])); 1032 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); 1033 } 1034 else 1035 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); 1036 DONE; 1037}) 1038 1039(define_expand "vlshr<mode>3" 1040 [(set (match_operand:VDQIW 0 "s_register_operand") 1041 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") 1042 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] 1043 "TARGET_NEON" 1044{ 1045 if (s_register_operand (operands[2], <MODE>mode)) 1046 { 1047 rtx neg = gen_reg_rtx (<MODE>mode); 1048 emit_insn (gen_neg<mode>2 (neg, operands[2])); 1049 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); 1050 } 1051 else 1052 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); 1053 DONE; 1054}) 1055 1056;; 64-bit shifts 1057 1058;; This pattern loads a 32-bit shift count into a 64-bit NEON register, 1059;; leaving the upper half uninitalized. This is OK since the shift 1060;; instruction only looks at the low 8 bits anyway. To avoid confusing 1061;; data flow analysis however, we pretend the full register is set 1062;; using an unspec. 1063(define_insn "neon_load_count" 1064 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 1065 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] 1066 UNSPEC_LOAD_COUNT))] 1067 "TARGET_NEON" 1068 "@ 1069 vld1.32\t{%P0[0]}, %A1 1070 vmov.32\t%P0[0], %1" 1071 [(set_attr "type" "neon_load1_1reg,neon_from_gp")] 1072) 1073 1074;; Widening operations 1075 1076(define_expand "widen_ssum<mode>3" 1077 [(set (match_operand:<V_double_width> 0 "s_register_operand") 1078 (plus:<V_double_width> 1079 (sign_extend:<V_double_width> 1080 (match_operand:VQI 1 "s_register_operand")) 1081 (match_operand:<V_double_width> 2 "s_register_operand")))] 1082 "TARGET_NEON" 1083 { 1084 machine_mode mode = GET_MODE (operands[1]); 1085 rtx p1, p2; 1086 1087 p1 = arm_simd_vect_par_cnst_half (mode, false); 1088 p2 = arm_simd_vect_par_cnst_half (mode, true); 1089 1090 if (operands[0] != operands[2]) 1091 emit_move_insn (operands[0], operands[2]); 1092 1093 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], 1094 operands[1], 1095 p1, 1096 operands[0])); 1097 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], 1098 operands[1], 1099 p2, 1100 operands[0])); 1101 DONE; 1102 } 1103) 1104 1105(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3" 1106 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1107 (plus:<V_double_width> 1108 (sign_extend:<V_double_width> 1109 (vec_select:<V_HALF> 1110 (match_operand:VQI 1 "s_register_operand" "%w") 1111 (match_operand:VQI 2 "vect_par_constant_low" ""))) 1112 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1113 "TARGET_NEON" 1114{ 1115 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : 1116 "vaddw.<V_s_elem>\t%q0, %q3, %e1"; 1117} 1118 [(set_attr "type" "neon_add_widen")]) 1119 1120(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3" 1121 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1122 (plus:<V_double_width> 1123 (sign_extend:<V_double_width> 1124 (vec_select:<V_HALF> 1125 (match_operand:VQI 1 "s_register_operand" "%w") 1126 (match_operand:VQI 2 "vect_par_constant_high" ""))) 1127 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1128 "TARGET_NEON" 1129{ 1130 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : 1131 "vaddw.<V_s_elem>\t%q0, %q3, %f1"; 1132} 1133 [(set_attr "type" "neon_add_widen")]) 1134 1135(define_insn "widen_ssum<mode>3" 1136 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1137 (plus:<V_widen> 1138 (sign_extend:<V_widen> 1139 (match_operand:VW 1 "s_register_operand" "%w")) 1140 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1141 "TARGET_NEON" 1142 "vaddw.<V_s_elem>\t%q0, %q2, %P1" 1143 [(set_attr "type" "neon_add_widen")] 1144) 1145 1146(define_expand "widen_usum<mode>3" 1147 [(set (match_operand:<V_double_width> 0 "s_register_operand") 1148 (plus:<V_double_width> 1149 (zero_extend:<V_double_width> 1150 (match_operand:VQI 1 "s_register_operand")) 1151 (match_operand:<V_double_width> 2 "s_register_operand")))] 1152 "TARGET_NEON" 1153 { 1154 machine_mode mode = GET_MODE (operands[1]); 1155 rtx p1, p2; 1156 1157 p1 = arm_simd_vect_par_cnst_half (mode, false); 1158 p2 = arm_simd_vect_par_cnst_half (mode, true); 1159 1160 if (operands[0] != operands[2]) 1161 emit_move_insn (operands[0], operands[2]); 1162 1163 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], 1164 operands[1], 1165 p1, 1166 operands[0])); 1167 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], 1168 operands[1], 1169 p2, 1170 operands[0])); 1171 DONE; 1172 } 1173) 1174 1175(define_insn "vec_sel_widen_usum_lo<mode><V_half>3" 1176 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1177 (plus:<V_double_width> 1178 (zero_extend:<V_double_width> 1179 (vec_select:<V_HALF> 1180 (match_operand:VQI 1 "s_register_operand" "%w") 1181 (match_operand:VQI 2 "vect_par_constant_low" ""))) 1182 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1183 "TARGET_NEON" 1184{ 1185 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : 1186 "vaddw.<V_u_elem>\t%q0, %q3, %e1"; 1187} 1188 [(set_attr "type" "neon_add_widen")]) 1189 1190(define_insn "vec_sel_widen_usum_hi<mode><V_half>3" 1191 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1192 (plus:<V_double_width> 1193 (zero_extend:<V_double_width> 1194 (vec_select:<V_HALF> 1195 (match_operand:VQI 1 "s_register_operand" "%w") 1196 (match_operand:VQI 2 "vect_par_constant_high" ""))) 1197 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1198 "TARGET_NEON" 1199{ 1200 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : 1201 "vaddw.<V_u_elem>\t%q0, %q3, %f1"; 1202} 1203 [(set_attr "type" "neon_add_widen")]) 1204 1205(define_insn "widen_usum<mode>3" 1206 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1207 (plus:<V_widen> (zero_extend:<V_widen> 1208 (match_operand:VW 1 "s_register_operand" "%w")) 1209 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1210 "TARGET_NEON" 1211 "vaddw.<V_u_elem>\t%q0, %q2, %P1" 1212 [(set_attr "type" "neon_add_widen")] 1213) 1214 1215;; Helpers for quad-word reduction operations 1216 1217; Add (or smin, smax...) the low N/2 elements of the N-element vector 1218; operand[1] to the high N/2 elements of same. Put the result in operand[0], an 1219; N/2-element vector. 1220 1221(define_insn "quad_halves_<code>v4si" 1222 [(set (match_operand:V2SI 0 "s_register_operand" "=w") 1223 (VQH_OPS:V2SI 1224 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") 1225 (parallel [(const_int 0) (const_int 1)])) 1226 (vec_select:V2SI (match_dup 1) 1227 (parallel [(const_int 2) (const_int 3)]))))] 1228 "TARGET_NEON" 1229 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" 1230 [(set_attr "vqh_mnem" "<VQH_mnem>") 1231 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1232) 1233 1234(define_insn "quad_halves_<code>v4sf" 1235 [(set (match_operand:V2SF 0 "s_register_operand" "=w") 1236 (VQHS_OPS:V2SF 1237 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") 1238 (parallel [(const_int 0) (const_int 1)])) 1239 (vec_select:V2SF (match_dup 1) 1240 (parallel [(const_int 2) (const_int 3)]))))] 1241 "TARGET_NEON && flag_unsafe_math_optimizations" 1242 "<VQH_mnem>.f32\t%P0, %e1, %f1" 1243 [(set_attr "vqh_mnem" "<VQH_mnem>") 1244 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] 1245) 1246 1247(define_insn "quad_halves_<code>v8hi" 1248 [(set (match_operand:V4HI 0 "s_register_operand" "+w") 1249 (VQH_OPS:V4HI 1250 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") 1251 (parallel [(const_int 0) (const_int 1) 1252 (const_int 2) (const_int 3)])) 1253 (vec_select:V4HI (match_dup 1) 1254 (parallel [(const_int 4) (const_int 5) 1255 (const_int 6) (const_int 7)]))))] 1256 "TARGET_NEON" 1257 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" 1258 [(set_attr "vqh_mnem" "<VQH_mnem>") 1259 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1260) 1261 1262(define_insn "quad_halves_<code>v16qi" 1263 [(set (match_operand:V8QI 0 "s_register_operand" "+w") 1264 (VQH_OPS:V8QI 1265 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") 1266 (parallel [(const_int 0) (const_int 1) 1267 (const_int 2) (const_int 3) 1268 (const_int 4) (const_int 5) 1269 (const_int 6) (const_int 7)])) 1270 (vec_select:V8QI (match_dup 1) 1271 (parallel [(const_int 8) (const_int 9) 1272 (const_int 10) (const_int 11) 1273 (const_int 12) (const_int 13) 1274 (const_int 14) (const_int 15)]))))] 1275 "TARGET_NEON" 1276 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" 1277 [(set_attr "vqh_mnem" "<VQH_mnem>") 1278 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1279) 1280 1281(define_expand "move_hi_quad_<mode>" 1282 [(match_operand:ANY128 0 "s_register_operand") 1283 (match_operand:<V_HALF> 1 "s_register_operand")] 1284 "TARGET_NEON" 1285{ 1286 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, 1287 GET_MODE_SIZE (<V_HALF>mode)), 1288 operands[1]); 1289 DONE; 1290}) 1291 1292(define_expand "move_lo_quad_<mode>" 1293 [(match_operand:ANY128 0 "s_register_operand") 1294 (match_operand:<V_HALF> 1 "s_register_operand")] 1295 "TARGET_NEON" 1296{ 1297 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], 1298 <MODE>mode, 0), 1299 operands[1]); 1300 DONE; 1301}) 1302 1303;; Reduction operations 1304 1305(define_expand "reduc_plus_scal_<mode>" 1306 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1307 (match_operand:VD 1 "s_register_operand")] 1308 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1309{ 1310 rtx vec = gen_reg_rtx (<MODE>mode); 1311 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1312 &gen_neon_vpadd_internal<mode>); 1313 /* The same result is actually computed into every element. */ 1314 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1315 DONE; 1316}) 1317 1318(define_expand "reduc_plus_scal_<mode>" 1319 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1320 (match_operand:VQ 1 "s_register_operand")] 1321 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1322 && !BYTES_BIG_ENDIAN" 1323{ 1324 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1325 1326 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); 1327 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); 1328 1329 DONE; 1330}) 1331 1332(define_expand "reduc_plus_scal_v2di" 1333 [(match_operand:DI 0 "nonimmediate_operand") 1334 (match_operand:V2DI 1 "s_register_operand")] 1335 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1336{ 1337 rtx vec = gen_reg_rtx (V2DImode); 1338 1339 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); 1340 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx)); 1341 1342 DONE; 1343}) 1344 1345(define_insn "arm_reduc_plus_internal_v2di" 1346 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 1347 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] 1348 UNSPEC_VPADD))] 1349 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1350 "vadd.i64\t%e0, %e1, %f1" 1351 [(set_attr "type" "neon_add_q")] 1352) 1353 1354(define_expand "reduc_smin_scal_<mode>" 1355 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1356 (match_operand:VD 1 "s_register_operand")] 1357 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1358{ 1359 rtx vec = gen_reg_rtx (<MODE>mode); 1360 1361 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1362 &gen_neon_vpsmin<mode>); 1363 /* The result is computed into every element of the vector. */ 1364 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1365 DONE; 1366}) 1367 1368(define_expand "reduc_smin_scal_<mode>" 1369 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1370 (match_operand:VQ 1 "s_register_operand")] 1371 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1372 && !BYTES_BIG_ENDIAN" 1373{ 1374 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1375 1376 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); 1377 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); 1378 1379 DONE; 1380}) 1381 1382(define_expand "reduc_smax_scal_<mode>" 1383 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1384 (match_operand:VD 1 "s_register_operand")] 1385 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1386{ 1387 rtx vec = gen_reg_rtx (<MODE>mode); 1388 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1389 &gen_neon_vpsmax<mode>); 1390 /* The result is computed into every element of the vector. */ 1391 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1392 DONE; 1393}) 1394 1395(define_expand "reduc_smax_scal_<mode>" 1396 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1397 (match_operand:VQ 1 "s_register_operand")] 1398 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1399 && !BYTES_BIG_ENDIAN" 1400{ 1401 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1402 1403 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); 1404 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); 1405 1406 DONE; 1407}) 1408 1409(define_expand "reduc_umin_scal_<mode>" 1410 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1411 (match_operand:VDI 1 "s_register_operand")] 1412 "TARGET_NEON" 1413{ 1414 rtx vec = gen_reg_rtx (<MODE>mode); 1415 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1416 &gen_neon_vpumin<mode>); 1417 /* The result is computed into every element of the vector. */ 1418 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1419 DONE; 1420}) 1421 1422(define_expand "reduc_umin_scal_<mode>" 1423 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1424 (match_operand:VQI 1 "s_register_operand")] 1425 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1426{ 1427 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1428 1429 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); 1430 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); 1431 1432 DONE; 1433}) 1434 1435(define_expand "reduc_umax_scal_<mode>" 1436 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1437 (match_operand:VDI 1 "s_register_operand")] 1438 "TARGET_NEON" 1439{ 1440 rtx vec = gen_reg_rtx (<MODE>mode); 1441 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1442 &gen_neon_vpumax<mode>); 1443 /* The result is computed into every element of the vector. */ 1444 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1445 DONE; 1446}) 1447 1448(define_expand "reduc_umax_scal_<mode>" 1449 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1450 (match_operand:VQI 1 "s_register_operand")] 1451 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1452{ 1453 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1454 1455 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); 1456 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); 1457 1458 DONE; 1459}) 1460 1461(define_insn "neon_vpadd_internal<mode>" 1462 [(set (match_operand:VD 0 "s_register_operand" "=w") 1463 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1464 (match_operand:VD 2 "s_register_operand" "w")] 1465 UNSPEC_VPADD))] 1466 "TARGET_NEON" 1467 "vpadd.<V_if_elem>\t%P0, %P1, %P2" 1468 ;; Assume this schedules like vadd. 1469 [(set (attr "type") 1470 (if_then_else (match_test "<Is_float_mode>") 1471 (const_string "neon_fp_reduc_add_s<q>") 1472 (const_string "neon_reduc_add<q>")))] 1473) 1474 1475(define_insn "neon_vpaddv4hf" 1476 [(set 1477 (match_operand:V4HF 0 "s_register_operand" "=w") 1478 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") 1479 (match_operand:V4HF 2 "s_register_operand" "w")] 1480 UNSPEC_VPADD))] 1481 "TARGET_NEON_FP16INST" 1482 "vpadd.f16\t%P0, %P1, %P2" 1483 [(set_attr "type" "neon_reduc_add")] 1484) 1485 1486(define_insn "neon_vpsmin<mode>" 1487 [(set (match_operand:VD 0 "s_register_operand" "=w") 1488 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1489 (match_operand:VD 2 "s_register_operand" "w")] 1490 UNSPEC_VPSMIN))] 1491 "TARGET_NEON" 1492 "vpmin.<V_s_elem>\t%P0, %P1, %P2" 1493 [(set (attr "type") 1494 (if_then_else (match_test "<Is_float_mode>") 1495 (const_string "neon_fp_reduc_minmax_s<q>") 1496 (const_string "neon_reduc_minmax<q>")))] 1497) 1498 1499(define_insn "neon_vpsmax<mode>" 1500 [(set (match_operand:VD 0 "s_register_operand" "=w") 1501 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1502 (match_operand:VD 2 "s_register_operand" "w")] 1503 UNSPEC_VPSMAX))] 1504 "TARGET_NEON" 1505 "vpmax.<V_s_elem>\t%P0, %P1, %P2" 1506 [(set (attr "type") 1507 (if_then_else (match_test "<Is_float_mode>") 1508 (const_string "neon_fp_reduc_minmax_s<q>") 1509 (const_string "neon_reduc_minmax<q>")))] 1510) 1511 1512(define_insn "neon_vpumin<mode>" 1513 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1514 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1515 (match_operand:VDI 2 "s_register_operand" "w")] 1516 UNSPEC_VPUMIN))] 1517 "TARGET_NEON" 1518 "vpmin.<V_u_elem>\t%P0, %P1, %P2" 1519 [(set_attr "type" "neon_reduc_minmax<q>")] 1520) 1521 1522(define_insn "neon_vpumax<mode>" 1523 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1524 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1525 (match_operand:VDI 2 "s_register_operand" "w")] 1526 UNSPEC_VPUMAX))] 1527 "TARGET_NEON" 1528 "vpmax.<V_u_elem>\t%P0, %P1, %P2" 1529 [(set_attr "type" "neon_reduc_minmax<q>")] 1530) 1531 1532;; Saturating arithmetic 1533 1534; NOTE: Neon supports many more saturating variants of instructions than the 1535; following, but these are all GCC currently understands. 1536; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself 1537; yet either, although these patterns may be used by intrinsics when they're 1538; added. 1539 1540(define_insn "*ss_add<mode>_neon" 1541 [(set (match_operand:VD 0 "s_register_operand" "=w") 1542 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1543 (match_operand:VD 2 "s_register_operand" "w")))] 1544 "TARGET_NEON" 1545 "vqadd.<V_s_elem>\t%P0, %P1, %P2" 1546 [(set_attr "type" "neon_qadd<q>")] 1547) 1548 1549(define_insn "*us_add<mode>_neon" 1550 [(set (match_operand:VD 0 "s_register_operand" "=w") 1551 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1552 (match_operand:VD 2 "s_register_operand" "w")))] 1553 "TARGET_NEON" 1554 "vqadd.<V_u_elem>\t%P0, %P1, %P2" 1555 [(set_attr "type" "neon_qadd<q>")] 1556) 1557 1558(define_insn "*ss_sub<mode>_neon" 1559 [(set (match_operand:VD 0 "s_register_operand" "=w") 1560 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1561 (match_operand:VD 2 "s_register_operand" "w")))] 1562 "TARGET_NEON" 1563 "vqsub.<V_s_elem>\t%P0, %P1, %P2" 1564 [(set_attr "type" "neon_qsub<q>")] 1565) 1566 1567(define_insn "*us_sub<mode>_neon" 1568 [(set (match_operand:VD 0 "s_register_operand" "=w") 1569 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1570 (match_operand:VD 2 "s_register_operand" "w")))] 1571 "TARGET_NEON" 1572 "vqsub.<V_u_elem>\t%P0, %P1, %P2" 1573 [(set_attr "type" "neon_qsub<q>")] 1574) 1575 1576;; Conditional instructions. These are comparisons with conditional moves for 1577;; vectors. They perform the assignment: 1578;; 1579;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; 1580;; 1581;; where op3 is <, <=, ==, !=, >= or >. Operations are performed 1582;; element-wise. 1583 1584(define_expand "vcond<mode><mode>" 1585 [(set (match_operand:VDQW 0 "s_register_operand") 1586 (if_then_else:VDQW 1587 (match_operator 3 "comparison_operator" 1588 [(match_operand:VDQW 4 "s_register_operand") 1589 (match_operand:VDQW 5 "nonmemory_operand")]) 1590 (match_operand:VDQW 1 "s_register_operand") 1591 (match_operand:VDQW 2 "s_register_operand")))] 1592 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1593{ 1594 int inverse = 0; 1595 int use_zero_form = 0; 1596 int swap_bsl_operands = 0; 1597 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 1598 rtx tmp = gen_reg_rtx (<V_cmp_result>mode); 1599 1600 rtx (*base_comparison) (rtx, rtx, rtx); 1601 rtx (*complimentary_comparison) (rtx, rtx, rtx); 1602 1603 switch (GET_CODE (operands[3])) 1604 { 1605 case GE: 1606 case GT: 1607 case LE: 1608 case LT: 1609 case EQ: 1610 if (operands[5] == CONST0_RTX (<MODE>mode)) 1611 { 1612 use_zero_form = 1; 1613 break; 1614 } 1615 /* Fall through. */ 1616 default: 1617 if (!REG_P (operands[5])) 1618 operands[5] = force_reg (<MODE>mode, operands[5]); 1619 } 1620 1621 switch (GET_CODE (operands[3])) 1622 { 1623 case LT: 1624 case UNLT: 1625 inverse = 1; 1626 /* Fall through. */ 1627 case GE: 1628 case UNGE: 1629 case ORDERED: 1630 case UNORDERED: 1631 base_comparison = gen_neon_vcge<mode>; 1632 complimentary_comparison = gen_neon_vcgt<mode>; 1633 break; 1634 case LE: 1635 case UNLE: 1636 inverse = 1; 1637 /* Fall through. */ 1638 case GT: 1639 case UNGT: 1640 base_comparison = gen_neon_vcgt<mode>; 1641 complimentary_comparison = gen_neon_vcge<mode>; 1642 break; 1643 case EQ: 1644 case NE: 1645 case UNEQ: 1646 base_comparison = gen_neon_vceq<mode>; 1647 complimentary_comparison = gen_neon_vceq<mode>; 1648 break; 1649 default: 1650 gcc_unreachable (); 1651 } 1652 1653 switch (GET_CODE (operands[3])) 1654 { 1655 case LT: 1656 case LE: 1657 case GT: 1658 case GE: 1659 case EQ: 1660 /* The easy case. Here we emit one of vcge, vcgt or vceq. 1661 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 1662 a GE b -> a GE b 1663 a GT b -> a GT b 1664 a LE b -> b GE a 1665 a LT b -> b GT a 1666 a EQ b -> a EQ b 1667 Note that there also exist direct comparison against 0 forms, 1668 so catch those as a special case. */ 1669 if (use_zero_form) 1670 { 1671 inverse = 0; 1672 switch (GET_CODE (operands[3])) 1673 { 1674 case LT: 1675 base_comparison = gen_neon_vclt<mode>; 1676 break; 1677 case LE: 1678 base_comparison = gen_neon_vcle<mode>; 1679 break; 1680 default: 1681 /* Do nothing, other zero form cases already have the correct 1682 base_comparison. */ 1683 break; 1684 } 1685 } 1686 1687 if (!inverse) 1688 emit_insn (base_comparison (mask, operands[4], operands[5])); 1689 else 1690 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1691 break; 1692 case UNLT: 1693 case UNLE: 1694 case UNGT: 1695 case UNGE: 1696 case NE: 1697 /* Vector compare returns false for lanes which are unordered, so if we use 1698 the inverse of the comparison we actually want to emit, then 1699 swap the operands to BSL, we will end up with the correct result. 1700 Note that a NE NaN and NaN NE b are true for all a, b. 1701 1702 Our transformations are: 1703 a GE b -> !(b GT a) 1704 a GT b -> !(b GE a) 1705 a LE b -> !(a GT b) 1706 a LT b -> !(a GE b) 1707 a NE b -> !(a EQ b) */ 1708 1709 if (inverse) 1710 emit_insn (base_comparison (mask, operands[4], operands[5])); 1711 else 1712 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1713 1714 swap_bsl_operands = 1; 1715 break; 1716 case UNEQ: 1717 /* We check (a > b || b > a). combining these comparisons give us 1718 true iff !(a != b && a ORDERED b), swapping the operands to BSL 1719 will then give us (a == b || a UNORDERED b) as intended. */ 1720 1721 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5])); 1722 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4])); 1723 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1724 swap_bsl_operands = 1; 1725 break; 1726 case UNORDERED: 1727 /* Operands are ORDERED iff (a > b || b >= a). 1728 Swapping the operands to BSL will give the UNORDERED case. */ 1729 swap_bsl_operands = 1; 1730 /* Fall through. */ 1731 case ORDERED: 1732 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5])); 1733 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4])); 1734 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1735 break; 1736 default: 1737 gcc_unreachable (); 1738 } 1739 1740 if (swap_bsl_operands) 1741 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 1742 operands[1])); 1743 else 1744 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 1745 operands[2])); 1746 DONE; 1747}) 1748 1749(define_expand "vcondu<mode><mode>" 1750 [(set (match_operand:VDQIW 0 "s_register_operand") 1751 (if_then_else:VDQIW 1752 (match_operator 3 "arm_comparison_operator" 1753 [(match_operand:VDQIW 4 "s_register_operand") 1754 (match_operand:VDQIW 5 "s_register_operand")]) 1755 (match_operand:VDQIW 1 "s_register_operand") 1756 (match_operand:VDQIW 2 "s_register_operand")))] 1757 "TARGET_NEON" 1758{ 1759 rtx mask; 1760 int inverse = 0, immediate_zero = 0; 1761 1762 mask = gen_reg_rtx (<V_cmp_result>mode); 1763 1764 if (operands[5] == CONST0_RTX (<MODE>mode)) 1765 immediate_zero = 1; 1766 else if (!REG_P (operands[5])) 1767 operands[5] = force_reg (<MODE>mode, operands[5]); 1768 1769 switch (GET_CODE (operands[3])) 1770 { 1771 case GEU: 1772 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5])); 1773 break; 1774 1775 case GTU: 1776 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5])); 1777 break; 1778 1779 case EQ: 1780 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 1781 break; 1782 1783 case LEU: 1784 if (immediate_zero) 1785 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5])); 1786 else 1787 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4])); 1788 break; 1789 1790 case LTU: 1791 if (immediate_zero) 1792 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5])); 1793 else 1794 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4])); 1795 break; 1796 1797 case NE: 1798 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 1799 inverse = 1; 1800 break; 1801 1802 default: 1803 gcc_unreachable (); 1804 } 1805 1806 if (inverse) 1807 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 1808 operands[1])); 1809 else 1810 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 1811 operands[2])); 1812 1813 DONE; 1814}) 1815 1816;; Patterns for builtins. 1817 1818; good for plain vadd, vaddq. 1819 1820(define_expand "neon_vadd<mode>" 1821 [(match_operand:VCVTF 0 "s_register_operand") 1822 (match_operand:VCVTF 1 "s_register_operand") 1823 (match_operand:VCVTF 2 "s_register_operand")] 1824 "TARGET_NEON" 1825{ 1826 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 1827 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); 1828 else 1829 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], 1830 operands[2])); 1831 DONE; 1832}) 1833 1834(define_expand "neon_vadd<mode>" 1835 [(match_operand:VH 0 "s_register_operand") 1836 (match_operand:VH 1 "s_register_operand") 1837 (match_operand:VH 2 "s_register_operand")] 1838 "TARGET_NEON_FP16INST" 1839{ 1840 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2])); 1841 DONE; 1842}) 1843 1844(define_expand "neon_vsub<mode>" 1845 [(match_operand:VH 0 "s_register_operand") 1846 (match_operand:VH 1 "s_register_operand") 1847 (match_operand:VH 2 "s_register_operand")] 1848 "TARGET_NEON_FP16INST" 1849{ 1850 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2])); 1851 DONE; 1852}) 1853 1854; Note that NEON operations don't support the full IEEE 754 standard: in 1855; particular, denormal values are flushed to zero. This means that GCC cannot 1856; use those instructions for autovectorization, etc. unless 1857; -funsafe-math-optimizations is in effect (in which case flush-to-zero 1858; behavior is permissible). Intrinsic operations (provided by the arm_neon.h 1859; header) must work in either case: if -funsafe-math-optimizations is given, 1860; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics 1861; expand to unspecs (which may potentially limit the extent to which they might 1862; be optimized by generic code). 1863 1864; Used for intrinsics when flag_unsafe_math_optimizations is false. 1865 1866(define_insn "neon_vadd<mode>_unspec" 1867 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1868 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 1869 (match_operand:VCVTF 2 "s_register_operand" "w")] 1870 UNSPEC_VADD))] 1871 "TARGET_NEON" 1872 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1873 [(set (attr "type") 1874 (if_then_else (match_test "<Is_float_mode>") 1875 (const_string "neon_fp_addsub_s<q>") 1876 (const_string "neon_add<q>")))] 1877) 1878 1879(define_insn "neon_vaddl<sup><mode>" 1880 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1881 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 1882 (match_operand:VDI 2 "s_register_operand" "w")] 1883 VADDL))] 1884 "TARGET_NEON" 1885 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 1886 [(set_attr "type" "neon_add_long")] 1887) 1888 1889(define_insn "neon_vaddw<sup><mode>" 1890 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1891 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 1892 (match_operand:VDI 2 "s_register_operand" "w")] 1893 VADDW))] 1894 "TARGET_NEON" 1895 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 1896 [(set_attr "type" "neon_add_widen")] 1897) 1898 1899; vhadd and vrhadd. 1900 1901(define_insn "neon_v<r>hadd<sup><mode>" 1902 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 1903 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 1904 (match_operand:VDQIW 2 "s_register_operand" "w")] 1905 VHADD))] 1906 "TARGET_NEON" 1907 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1908 [(set_attr "type" "neon_add_halve_q")] 1909) 1910 1911(define_insn "neon_vqadd<sup><mode>" 1912 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 1913 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 1914 (match_operand:VDQIX 2 "s_register_operand" "w")] 1915 VQADD))] 1916 "TARGET_NEON" 1917 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1918 [(set_attr "type" "neon_qadd<q>")] 1919) 1920 1921(define_insn "neon_v<r>addhn<mode>" 1922 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 1923 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 1924 (match_operand:VN 2 "s_register_operand" "w")] 1925 VADDHN))] 1926 "TARGET_NEON" 1927 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" 1928 [(set_attr "type" "neon_add_halve_narrow_q")] 1929) 1930 1931;; Polynomial and Float multiplication. 1932(define_insn "neon_vmul<pf><mode>" 1933 [(set (match_operand:VPF 0 "s_register_operand" "=w") 1934 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") 1935 (match_operand:VPF 2 "s_register_operand" "w")] 1936 UNSPEC_VMUL))] 1937 "TARGET_NEON" 1938 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1939 [(set (attr "type") 1940 (if_then_else (match_test "<Is_float_mode>") 1941 (const_string "neon_fp_mul_s<q>") 1942 (const_string "neon_mul_<V_elem_ch><q>")))] 1943) 1944 1945(define_insn "mul<mode>3" 1946 [(set 1947 (match_operand:VH 0 "s_register_operand" "=w") 1948 (mult:VH 1949 (match_operand:VH 1 "s_register_operand" "w") 1950 (match_operand:VH 2 "s_register_operand" "w")))] 1951 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 1952 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1953 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] 1954) 1955 1956(define_insn "neon_vmulf<mode>" 1957 [(set 1958 (match_operand:VH 0 "s_register_operand" "=w") 1959 (mult:VH 1960 (match_operand:VH 1 "s_register_operand" "w") 1961 (match_operand:VH 2 "s_register_operand" "w")))] 1962 "TARGET_NEON_FP16INST" 1963 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1964 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] 1965) 1966 1967(define_expand "neon_vmla<mode>" 1968 [(match_operand:VDQW 0 "s_register_operand") 1969 (match_operand:VDQW 1 "s_register_operand") 1970 (match_operand:VDQW 2 "s_register_operand") 1971 (match_operand:VDQW 3 "s_register_operand")] 1972 "TARGET_NEON" 1973{ 1974 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 1975 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], 1976 operands[2], operands[3])); 1977 else 1978 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], 1979 operands[2], operands[3])); 1980 DONE; 1981}) 1982 1983(define_expand "neon_vfma<VCVTF:mode>" 1984 [(match_operand:VCVTF 0 "s_register_operand") 1985 (match_operand:VCVTF 1 "s_register_operand") 1986 (match_operand:VCVTF 2 "s_register_operand") 1987 (match_operand:VCVTF 3 "s_register_operand")] 1988 "TARGET_NEON && TARGET_FMA" 1989{ 1990 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 1991 operands[1])); 1992 DONE; 1993}) 1994 1995(define_expand "neon_vfma<VH:mode>" 1996 [(match_operand:VH 0 "s_register_operand") 1997 (match_operand:VH 1 "s_register_operand") 1998 (match_operand:VH 2 "s_register_operand") 1999 (match_operand:VH 3 "s_register_operand")] 2000 "TARGET_NEON_FP16INST" 2001{ 2002 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 2003 operands[1])); 2004 DONE; 2005}) 2006 2007(define_expand "neon_vfms<VCVTF:mode>" 2008 [(match_operand:VCVTF 0 "s_register_operand") 2009 (match_operand:VCVTF 1 "s_register_operand") 2010 (match_operand:VCVTF 2 "s_register_operand") 2011 (match_operand:VCVTF 3 "s_register_operand")] 2012 "TARGET_NEON && TARGET_FMA" 2013{ 2014 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 2015 operands[1])); 2016 DONE; 2017}) 2018 2019(define_expand "neon_vfms<VH:mode>" 2020 [(match_operand:VH 0 "s_register_operand") 2021 (match_operand:VH 1 "s_register_operand") 2022 (match_operand:VH 2 "s_register_operand") 2023 (match_operand:VH 3 "s_register_operand")] 2024 "TARGET_NEON_FP16INST" 2025{ 2026 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 2027 operands[1])); 2028 DONE; 2029}) 2030 2031;; The expand RTL structure here is not important. 2032;; We use the gen_* functions anyway. 2033;; We just need something to wrap the iterators around. 2034 2035(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" 2036 [(set (match_operand:VCVTF 0 "s_register_operand") 2037 (unspec:VCVTF 2038 [(match_operand:VCVTF 1 "s_register_operand") 2039 (PLUSMINUS:<VFML> 2040 (match_operand:<VFML> 2 "s_register_operand") 2041 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] 2042 "TARGET_FP16FML" 2043{ 2044 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2045 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], 2046 operands[1], 2047 operands[2], 2048 operands[3], 2049 half, half)); 2050 DONE; 2051}) 2052 2053(define_insn "vfmal_low<mode>_intrinsic" 2054 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2055 (fma:VCVTF 2056 (float_extend:VCVTF 2057 (vec_select:<VFMLSEL> 2058 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2059 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2060 (float_extend:VCVTF 2061 (vec_select:<VFMLSEL> 2062 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2063 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 2064 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2065 "TARGET_FP16FML" 2066 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 2067 [(set_attr "type" "neon_fp_mla_s<q>")] 2068) 2069 2070(define_insn "vfmsl_high<mode>_intrinsic" 2071 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2072 (fma:VCVTF 2073 (float_extend:VCVTF 2074 (neg:<VFMLSEL> 2075 (vec_select:<VFMLSEL> 2076 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2077 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2078 (float_extend:VCVTF 2079 (vec_select:<VFMLSEL> 2080 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2081 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 2082 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2083 "TARGET_FP16FML" 2084 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 2085 [(set_attr "type" "neon_fp_mla_s<q>")] 2086) 2087 2088(define_insn "vfmal_high<mode>_intrinsic" 2089 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2090 (fma:VCVTF 2091 (float_extend:VCVTF 2092 (vec_select:<VFMLSEL> 2093 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2094 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2095 (float_extend:VCVTF 2096 (vec_select:<VFMLSEL> 2097 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2098 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 2099 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2100 "TARGET_FP16FML" 2101 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 2102 [(set_attr "type" "neon_fp_mla_s<q>")] 2103) 2104 2105(define_insn "vfmsl_low<mode>_intrinsic" 2106 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2107 (fma:VCVTF 2108 (float_extend:VCVTF 2109 (neg:<VFMLSEL> 2110 (vec_select:<VFMLSEL> 2111 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2112 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2113 (float_extend:VCVTF 2114 (vec_select:<VFMLSEL> 2115 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2116 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 2117 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2118 "TARGET_FP16FML" 2119 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 2120 [(set_attr "type" "neon_fp_mla_s<q>")] 2121) 2122 2123(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" 2124 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 2125 (unspec:VCVTF 2126 [(match_operand:VCVTF 1 "s_register_operand") 2127 (PLUSMINUS:<VFML> 2128 (match_operand:<VFML> 2 "s_register_operand") 2129 (match_operand:<VFML> 3 "s_register_operand")) 2130 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 2131 "TARGET_FP16FML" 2132{ 2133 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); 2134 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2135 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic 2136 (operands[0], operands[1], 2137 operands[2], operands[3], 2138 half, lane)); 2139 DONE; 2140}) 2141 2142(define_insn "vfmal_lane_low<mode>_intrinsic" 2143 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2144 (fma:VCVTF 2145 (float_extend:VCVTF 2146 (vec_select:<VFMLSEL> 2147 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2148 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2149 (float_extend:VCVTF 2150 (vec_duplicate:<VFMLSEL> 2151 (vec_select:HF 2152 (match_operand:<VFML> 3 "s_register_operand" "x") 2153 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2154 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2155 "TARGET_FP16FML" 2156 { 2157 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2158 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2159 { 2160 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2161 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 2162 } 2163 else 2164 { 2165 operands[5] = GEN_INT (lane); 2166 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 2167 } 2168 } 2169 [(set_attr "type" "neon_fp_mla_s<q>")] 2170) 2171 2172(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" 2173 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 2174 (unspec:VCVTF 2175 [(match_operand:VCVTF 1 "s_register_operand") 2176 (PLUSMINUS:<VFML> 2177 (match_operand:<VFML> 2 "s_register_operand") 2178 (match_operand:<VFMLSEL2> 3 "s_register_operand")) 2179 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 2180 "TARGET_FP16FML" 2181{ 2182 rtx lane 2183 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); 2184 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2185 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic 2186 (operands[0], operands[1], operands[2], operands[3], 2187 half, lane)); 2188 DONE; 2189}) 2190 2191;; Used to implement the intrinsics: 2192;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2193;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2194;; Needs a bit of care to get the modes of the different sub-expressions right 2195;; due to 'a' and 'b' having different sizes and make sure we use the right 2196;; S or D subregister to select the appropriate lane from. 2197 2198(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" 2199 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2200 (fma:VCVTF 2201 (float_extend:VCVTF 2202 (vec_select:<VFMLSEL> 2203 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2204 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2205 (float_extend:VCVTF 2206 (vec_duplicate:<VFMLSEL> 2207 (vec_select:HF 2208 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2209 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2210 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2211 "TARGET_FP16FML" 2212 { 2213 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2214 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2215 int new_lane = lane % elts_per_reg; 2216 int regdiff = lane / elts_per_reg; 2217 operands[5] = GEN_INT (new_lane); 2218 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 2219 because we want the print_operand code to print the appropriate 2220 S or D register prefix. */ 2221 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2222 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 2223 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 2224 } 2225 [(set_attr "type" "neon_fp_mla_s<q>")] 2226) 2227 2228;; Used to implement the intrinsics: 2229;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2230;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2231;; Needs a bit of care to get the modes of the different sub-expressions right 2232;; due to 'a' and 'b' having different sizes and make sure we use the right 2233;; S or D subregister to select the appropriate lane from. 2234 2235(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" 2236 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2237 (fma:VCVTF 2238 (float_extend:VCVTF 2239 (vec_select:<VFMLSEL> 2240 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2241 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2242 (float_extend:VCVTF 2243 (vec_duplicate:<VFMLSEL> 2244 (vec_select:HF 2245 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2246 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2247 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2248 "TARGET_FP16FML" 2249 { 2250 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2251 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2252 int new_lane = lane % elts_per_reg; 2253 int regdiff = lane / elts_per_reg; 2254 operands[5] = GEN_INT (new_lane); 2255 /* We re-create operands[3] in the halved VFMLSEL mode 2256 because we've calculated the correct half-width subreg to extract 2257 the lane from and we want to print *that* subreg instead. */ 2258 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2259 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 2260 } 2261 [(set_attr "type" "neon_fp_mla_s<q>")] 2262) 2263 2264(define_insn "vfmal_lane_high<mode>_intrinsic" 2265 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2266 (fma:VCVTF 2267 (float_extend:VCVTF 2268 (vec_select:<VFMLSEL> 2269 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2270 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2271 (float_extend:VCVTF 2272 (vec_duplicate:<VFMLSEL> 2273 (vec_select:HF 2274 (match_operand:<VFML> 3 "s_register_operand" "x") 2275 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2276 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2277 "TARGET_FP16FML" 2278 { 2279 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2280 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2281 { 2282 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2283 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 2284 } 2285 else 2286 { 2287 operands[5] = GEN_INT (lane); 2288 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 2289 } 2290 } 2291 [(set_attr "type" "neon_fp_mla_s<q>")] 2292) 2293 2294(define_insn "vfmsl_lane_low<mode>_intrinsic" 2295 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2296 (fma:VCVTF 2297 (float_extend:VCVTF 2298 (neg:<VFMLSEL> 2299 (vec_select:<VFMLSEL> 2300 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2301 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2302 (float_extend:VCVTF 2303 (vec_duplicate:<VFMLSEL> 2304 (vec_select:HF 2305 (match_operand:<VFML> 3 "s_register_operand" "x") 2306 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2307 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2308 "TARGET_FP16FML" 2309 { 2310 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2311 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2312 { 2313 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2314 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 2315 } 2316 else 2317 { 2318 operands[5] = GEN_INT (lane); 2319 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 2320 } 2321 } 2322 [(set_attr "type" "neon_fp_mla_s<q>")] 2323) 2324 2325;; Used to implement the intrinsics: 2326;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2327;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2328;; Needs a bit of care to get the modes of the different sub-expressions right 2329;; due to 'a' and 'b' having different sizes and make sure we use the right 2330;; S or D subregister to select the appropriate lane from. 2331 2332(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" 2333 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2334 (fma:VCVTF 2335 (float_extend:VCVTF 2336 (neg:<VFMLSEL> 2337 (vec_select:<VFMLSEL> 2338 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2339 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2340 (float_extend:VCVTF 2341 (vec_duplicate:<VFMLSEL> 2342 (vec_select:HF 2343 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2344 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2345 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2346 "TARGET_FP16FML" 2347 { 2348 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2349 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2350 int new_lane = lane % elts_per_reg; 2351 int regdiff = lane / elts_per_reg; 2352 operands[5] = GEN_INT (new_lane); 2353 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 2354 because we want the print_operand code to print the appropriate 2355 S or D register prefix. */ 2356 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2357 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 2358 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 2359 } 2360 [(set_attr "type" "neon_fp_mla_s<q>")] 2361) 2362 2363;; Used to implement the intrinsics: 2364;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2365;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2366;; Needs a bit of care to get the modes of the different sub-expressions right 2367;; due to 'a' and 'b' having different sizes and make sure we use the right 2368;; S or D subregister to select the appropriate lane from. 2369 2370(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" 2371 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2372 (fma:VCVTF 2373 (float_extend:VCVTF 2374 (neg:<VFMLSEL> 2375 (vec_select:<VFMLSEL> 2376 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2377 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2378 (float_extend:VCVTF 2379 (vec_duplicate:<VFMLSEL> 2380 (vec_select:HF 2381 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2382 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2383 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2384 "TARGET_FP16FML" 2385 { 2386 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2387 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2388 int new_lane = lane % elts_per_reg; 2389 int regdiff = lane / elts_per_reg; 2390 operands[5] = GEN_INT (new_lane); 2391 /* We re-create operands[3] in the halved VFMLSEL mode 2392 because we've calculated the correct half-width subreg to extract 2393 the lane from and we want to print *that* subreg instead. */ 2394 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2395 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 2396 } 2397 [(set_attr "type" "neon_fp_mla_s<q>")] 2398) 2399 2400(define_insn "vfmsl_lane_high<mode>_intrinsic" 2401 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2402 (fma:VCVTF 2403 (float_extend:VCVTF 2404 (neg:<VFMLSEL> 2405 (vec_select:<VFMLSEL> 2406 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2407 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2408 (float_extend:VCVTF 2409 (vec_duplicate:<VFMLSEL> 2410 (vec_select:HF 2411 (match_operand:<VFML> 3 "s_register_operand" "x") 2412 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2413 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2414 "TARGET_FP16FML" 2415 { 2416 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2417 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2418 { 2419 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2420 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 2421 } 2422 else 2423 { 2424 operands[5] = GEN_INT (lane); 2425 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 2426 } 2427 } 2428 [(set_attr "type" "neon_fp_mla_s<q>")] 2429) 2430 2431; Used for intrinsics when flag_unsafe_math_optimizations is false. 2432 2433(define_insn "neon_vmla<mode>_unspec" 2434 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2435 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2436 (match_operand:VDQW 2 "s_register_operand" "w") 2437 (match_operand:VDQW 3 "s_register_operand" "w")] 2438 UNSPEC_VMLA))] 2439 "TARGET_NEON" 2440 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2441 [(set (attr "type") 2442 (if_then_else (match_test "<Is_float_mode>") 2443 (const_string "neon_fp_mla_s<q>") 2444 (const_string "neon_mla_<V_elem_ch><q>")))] 2445) 2446 2447(define_insn "neon_vmlal<sup><mode>" 2448 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2449 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2450 (match_operand:VW 2 "s_register_operand" "w") 2451 (match_operand:VW 3 "s_register_operand" "w")] 2452 VMLAL))] 2453 "TARGET_NEON" 2454 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2455 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2456) 2457 2458(define_expand "neon_vmls<mode>" 2459 [(match_operand:VDQW 0 "s_register_operand") 2460 (match_operand:VDQW 1 "s_register_operand") 2461 (match_operand:VDQW 2 "s_register_operand") 2462 (match_operand:VDQW 3 "s_register_operand")] 2463 "TARGET_NEON" 2464{ 2465 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2466 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], 2467 operands[1], operands[2], operands[3])); 2468 else 2469 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], 2470 operands[2], operands[3])); 2471 DONE; 2472}) 2473 2474; Used for intrinsics when flag_unsafe_math_optimizations is false. 2475 2476(define_insn "neon_vmls<mode>_unspec" 2477 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2478 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2479 (match_operand:VDQW 2 "s_register_operand" "w") 2480 (match_operand:VDQW 3 "s_register_operand" "w")] 2481 UNSPEC_VMLS))] 2482 "TARGET_NEON" 2483 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2484 [(set (attr "type") 2485 (if_then_else (match_test "<Is_float_mode>") 2486 (const_string "neon_fp_mla_s<q>") 2487 (const_string "neon_mla_<V_elem_ch><q>")))] 2488) 2489 2490(define_insn "neon_vmlsl<sup><mode>" 2491 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2492 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2493 (match_operand:VW 2 "s_register_operand" "w") 2494 (match_operand:VW 3 "s_register_operand" "w")] 2495 VMLSL))] 2496 "TARGET_NEON" 2497 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2498 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2499) 2500 2501;; vqdmulh, vqrdmulh 2502(define_insn "neon_vq<r>dmulh<mode>" 2503 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2504 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") 2505 (match_operand:VMDQI 2 "s_register_operand" "w")] 2506 VQDMULH))] 2507 "TARGET_NEON" 2508 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2509 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] 2510) 2511 2512;; vqrdmlah, vqrdmlsh 2513(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" 2514 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2515 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") 2516 (match_operand:VMDQI 2 "s_register_operand" "w") 2517 (match_operand:VMDQI 3 "s_register_operand" "w")] 2518 VQRDMLH_AS))] 2519 "TARGET_NEON_RDMA" 2520 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2521 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2522) 2523 2524(define_insn "neon_vqdmlal<mode>" 2525 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2526 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2527 (match_operand:VMDI 2 "s_register_operand" "w") 2528 (match_operand:VMDI 3 "s_register_operand" "w")] 2529 UNSPEC_VQDMLAL))] 2530 "TARGET_NEON" 2531 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" 2532 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2533) 2534 2535(define_insn "neon_vqdmlsl<mode>" 2536 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2537 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2538 (match_operand:VMDI 2 "s_register_operand" "w") 2539 (match_operand:VMDI 3 "s_register_operand" "w")] 2540 UNSPEC_VQDMLSL))] 2541 "TARGET_NEON" 2542 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" 2543 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2544) 2545 2546(define_insn "neon_vmull<sup><mode>" 2547 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2548 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2549 (match_operand:VW 2 "s_register_operand" "w")] 2550 VMULL))] 2551 "TARGET_NEON" 2552 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2553 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 2554) 2555 2556(define_insn "neon_vqdmull<mode>" 2557 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2558 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 2559 (match_operand:VMDI 2 "s_register_operand" "w")] 2560 UNSPEC_VQDMULL))] 2561 "TARGET_NEON" 2562 "vqdmull.<V_s_elem>\t%q0, %P1, %P2" 2563 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] 2564) 2565 2566(define_expand "neon_vsub<mode>" 2567 [(match_operand:VCVTF 0 "s_register_operand") 2568 (match_operand:VCVTF 1 "s_register_operand") 2569 (match_operand:VCVTF 2 "s_register_operand")] 2570 "TARGET_NEON" 2571{ 2572 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2573 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); 2574 else 2575 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], 2576 operands[2])); 2577 DONE; 2578}) 2579 2580; Used for intrinsics when flag_unsafe_math_optimizations is false. 2581 2582(define_insn "neon_vsub<mode>_unspec" 2583 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2584 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2585 (match_operand:VCVTF 2 "s_register_operand" "w")] 2586 UNSPEC_VSUB))] 2587 "TARGET_NEON" 2588 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2589 [(set (attr "type") 2590 (if_then_else (match_test "<Is_float_mode>") 2591 (const_string "neon_fp_addsub_s<q>") 2592 (const_string "neon_sub<q>")))] 2593) 2594 2595(define_insn "neon_vsubl<sup><mode>" 2596 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2597 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 2598 (match_operand:VDI 2 "s_register_operand" "w")] 2599 VSUBL))] 2600 "TARGET_NEON" 2601 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2602 [(set_attr "type" "neon_sub_long")] 2603) 2604 2605(define_insn "neon_vsubw<sup><mode>" 2606 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2607 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 2608 (match_operand:VDI 2 "s_register_operand" "w")] 2609 VSUBW))] 2610 "TARGET_NEON" 2611 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 2612 [(set_attr "type" "neon_sub_widen")] 2613) 2614 2615(define_insn "neon_vqsub<sup><mode>" 2616 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 2617 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 2618 (match_operand:VDQIX 2 "s_register_operand" "w")] 2619 VQSUB))] 2620 "TARGET_NEON" 2621 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2622 [(set_attr "type" "neon_qsub<q>")] 2623) 2624 2625(define_insn "neon_vhsub<sup><mode>" 2626 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2627 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2628 (match_operand:VDQIW 2 "s_register_operand" "w")] 2629 VHSUB))] 2630 "TARGET_NEON" 2631 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2632 [(set_attr "type" "neon_sub_halve<q>")] 2633) 2634 2635(define_insn "neon_v<r>subhn<mode>" 2636 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 2637 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 2638 (match_operand:VN 2 "s_register_operand" "w")] 2639 VSUBHN))] 2640 "TARGET_NEON" 2641 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" 2642 [(set_attr "type" "neon_sub_halve_narrow_q")] 2643) 2644 2645;; These may expand to an UNSPEC pattern when a floating point mode is used 2646;; without unsafe math optimizations. 2647(define_expand "neon_vc<cmp_op><mode>" 2648 [(match_operand:<V_cmp_result> 0 "s_register_operand") 2649 (neg:<V_cmp_result> 2650 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") 2651 (match_operand:VDQW 2 "reg_or_zero_operand")))] 2652 "TARGET_NEON" 2653 { 2654 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2655 are enabled. */ 2656 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2657 && !flag_unsafe_math_optimizations) 2658 { 2659 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because 2660 we define gen_neon_vceq<mode>_insn_unspec only for float modes 2661 whereas this expander iterates over the integer modes as well, 2662 but we will never expand to UNSPECs for the integer comparisons. */ 2663 switch (<MODE>mode) 2664 { 2665 case E_V2SFmode: 2666 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], 2667 operands[1], 2668 operands[2])); 2669 break; 2670 case E_V4SFmode: 2671 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], 2672 operands[1], 2673 operands[2])); 2674 break; 2675 default: 2676 gcc_unreachable (); 2677 } 2678 } 2679 else 2680 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], 2681 operands[1], 2682 operands[2])); 2683 DONE; 2684 } 2685) 2686 2687(define_insn "neon_vc<cmp_op><mode>_insn" 2688 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2689 (neg:<V_cmp_result> 2690 (COMPARISONS:<V_cmp_result> 2691 (match_operand:VDQW 1 "s_register_operand" "w,w") 2692 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] 2693 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2694 && !flag_unsafe_math_optimizations)" 2695 { 2696 char pattern[100]; 2697 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2698 " %%<V_reg>1, %s", 2699 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2700 ? "f" : "<cmp_type>", 2701 which_alternative == 0 2702 ? "%<V_reg>2" : "#0"); 2703 output_asm_insn (pattern, operands); 2704 return ""; 2705 } 2706 [(set (attr "type") 2707 (if_then_else (match_operand 2 "zero_operand") 2708 (const_string "neon_compare_zero<q>") 2709 (const_string "neon_compare<q>")))] 2710) 2711 2712(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" 2713 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2714 (unspec:<V_cmp_result> 2715 [(match_operand:VCVTF 1 "s_register_operand" "w,w") 2716 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] 2717 NEON_VCMP))] 2718 "TARGET_NEON" 2719 { 2720 char pattern[100]; 2721 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2722 " %%<V_reg>1, %s", 2723 which_alternative == 0 2724 ? "%<V_reg>2" : "#0"); 2725 output_asm_insn (pattern, operands); 2726 return ""; 2727} 2728 [(set_attr "type" "neon_fp_compare_s<q>")] 2729) 2730 2731(define_expand "neon_vc<cmp_op><mode>" 2732 [(match_operand:<V_cmp_result> 0 "s_register_operand") 2733 (neg:<V_cmp_result> 2734 (COMPARISONS:VH 2735 (match_operand:VH 1 "s_register_operand") 2736 (match_operand:VH 2 "reg_or_zero_operand")))] 2737 "TARGET_NEON_FP16INST" 2738{ 2739 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2740 are enabled. */ 2741 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2742 && !flag_unsafe_math_optimizations) 2743 emit_insn 2744 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec 2745 (operands[0], operands[1], operands[2])); 2746 else 2747 emit_insn 2748 (gen_neon_vc<cmp_op><mode>_fp16insn 2749 (operands[0], operands[1], operands[2])); 2750 DONE; 2751}) 2752 2753(define_insn "neon_vc<cmp_op><mode>_fp16insn" 2754 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2755 (neg:<V_cmp_result> 2756 (COMPARISONS:<V_cmp_result> 2757 (match_operand:VH 1 "s_register_operand" "w,w") 2758 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] 2759 "TARGET_NEON_FP16INST 2760 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2761 && !flag_unsafe_math_optimizations)" 2762{ 2763 char pattern[100]; 2764 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2765 " %%<V_reg>1, %s", 2766 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2767 ? "f" : "<cmp_type>", 2768 which_alternative == 0 2769 ? "%<V_reg>2" : "#0"); 2770 output_asm_insn (pattern, operands); 2771 return ""; 2772} 2773 [(set (attr "type") 2774 (if_then_else (match_operand 2 "zero_operand") 2775 (const_string "neon_compare_zero<q>") 2776 (const_string "neon_compare<q>")))]) 2777 2778(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" 2779 [(set 2780 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2781 (unspec:<V_cmp_result> 2782 [(match_operand:VH 1 "s_register_operand" "w,w") 2783 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] 2784 NEON_VCMP))] 2785 "TARGET_NEON_FP16INST" 2786{ 2787 char pattern[100]; 2788 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2789 " %%<V_reg>1, %s", 2790 which_alternative == 0 2791 ? "%<V_reg>2" : "#0"); 2792 output_asm_insn (pattern, operands); 2793 return ""; 2794} 2795 [(set_attr "type" "neon_fp_compare_s<q>")]) 2796 2797(define_insn "neon_vc<cmp_op>u<mode>" 2798 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2799 (neg:<V_cmp_result> 2800 (GTUGEU:<V_cmp_result> 2801 (match_operand:VDQIW 1 "s_register_operand" "w") 2802 (match_operand:VDQIW 2 "s_register_operand" "w"))))] 2803 "TARGET_NEON" 2804 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2805 [(set_attr "type" "neon_compare<q>")] 2806) 2807 2808(define_expand "neon_vca<cmp_op><mode>" 2809 [(set (match_operand:<V_cmp_result> 0 "s_register_operand") 2810 (neg:<V_cmp_result> 2811 (GTGE:<V_cmp_result> 2812 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) 2813 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] 2814 "TARGET_NEON" 2815 { 2816 if (flag_unsafe_math_optimizations) 2817 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], 2818 operands[2])); 2819 else 2820 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], 2821 operands[1], 2822 operands[2])); 2823 DONE; 2824 } 2825) 2826 2827(define_insn "neon_vca<cmp_op><mode>_insn" 2828 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2829 (neg:<V_cmp_result> 2830 (GTGE:<V_cmp_result> 2831 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) 2832 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] 2833 "TARGET_NEON && flag_unsafe_math_optimizations" 2834 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2835 [(set_attr "type" "neon_fp_compare_s<q>")] 2836) 2837 2838(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" 2839 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2840 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") 2841 (match_operand:VCVTF 2 "s_register_operand" "w")] 2842 NEON_VACMP))] 2843 "TARGET_NEON" 2844 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2845 [(set_attr "type" "neon_fp_compare_s<q>")] 2846) 2847 2848(define_expand "neon_vca<cmp_op><mode>" 2849 [(set 2850 (match_operand:<V_cmp_result> 0 "s_register_operand") 2851 (neg:<V_cmp_result> 2852 (GLTE:<V_cmp_result> 2853 (abs:VH (match_operand:VH 1 "s_register_operand")) 2854 (abs:VH (match_operand:VH 2 "s_register_operand")))))] 2855 "TARGET_NEON_FP16INST" 2856{ 2857 if (flag_unsafe_math_optimizations) 2858 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn 2859 (operands[0], operands[1], operands[2])); 2860 else 2861 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec 2862 (operands[0], operands[1], operands[2])); 2863 DONE; 2864}) 2865 2866(define_insn "neon_vca<cmp_op><mode>_fp16insn" 2867 [(set 2868 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2869 (neg:<V_cmp_result> 2870 (GLTE:<V_cmp_result> 2871 (abs:VH (match_operand:VH 1 "s_register_operand" "w")) 2872 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] 2873 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 2874 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2875 [(set_attr "type" "neon_fp_compare_s<q>")] 2876) 2877 2878(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" 2879 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2880 (unspec:<V_cmp_result> 2881 [(match_operand:VH 1 "s_register_operand" "w") 2882 (match_operand:VH 2 "s_register_operand" "w")] 2883 NEON_VAGLTE))] 2884 "TARGET_NEON" 2885 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2886 [(set_attr "type" "neon_fp_compare_s<q>")] 2887) 2888 2889(define_expand "neon_vc<cmp_op>z<mode>" 2890 [(set 2891 (match_operand:<V_cmp_result> 0 "s_register_operand") 2892 (COMPARISONS:<V_cmp_result> 2893 (match_operand:VH 1 "s_register_operand") 2894 (const_int 0)))] 2895 "TARGET_NEON_FP16INST" 2896 { 2897 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], 2898 CONST0_RTX (<MODE>mode))); 2899 DONE; 2900}) 2901 2902(define_insn "neon_vtst<mode>" 2903 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2904 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2905 (match_operand:VDQIW 2 "s_register_operand" "w")] 2906 UNSPEC_VTST))] 2907 "TARGET_NEON" 2908 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2909 [(set_attr "type" "neon_tst<q>")] 2910) 2911 2912(define_insn "neon_vabd<sup><mode>" 2913 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2914 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2915 (match_operand:VDQIW 2 "s_register_operand" "w")] 2916 VABD))] 2917 "TARGET_NEON" 2918 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2919 [(set_attr "type" "neon_abd<q>")] 2920) 2921 2922(define_insn "neon_vabd<mode>" 2923 [(set (match_operand:VH 0 "s_register_operand" "=w") 2924 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 2925 (match_operand:VH 2 "s_register_operand" "w")] 2926 UNSPEC_VABD_F))] 2927 "TARGET_NEON_FP16INST" 2928 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2929 [(set_attr "type" "neon_abd<q>")] 2930) 2931 2932(define_insn "neon_vabdf<mode>" 2933 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2934 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2935 (match_operand:VCVTF 2 "s_register_operand" "w")] 2936 UNSPEC_VABD_F))] 2937 "TARGET_NEON" 2938 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2939 [(set_attr "type" "neon_fp_abd_s<q>")] 2940) 2941 2942(define_insn "neon_vabdl<sup><mode>" 2943 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2944 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2945 (match_operand:VW 2 "s_register_operand" "w")] 2946 VABDL))] 2947 "TARGET_NEON" 2948 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2949 [(set_attr "type" "neon_abd_long")] 2950) 2951 2952(define_insn "neon_vaba<sup><mode>" 2953 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2954 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") 2955 (match_operand:VDQIW 3 "s_register_operand" "w")] 2956 VABD) 2957 (match_operand:VDQIW 1 "s_register_operand" "0")))] 2958 "TARGET_NEON" 2959 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2960 [(set_attr "type" "neon_arith_acc<q>")] 2961) 2962 2963(define_insn "neon_vabal<sup><mode>" 2964 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2965 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") 2966 (match_operand:VW 3 "s_register_operand" "w")] 2967 VABDL) 2968 (match_operand:<V_widen> 1 "s_register_operand" "0")))] 2969 "TARGET_NEON" 2970 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2971 [(set_attr "type" "neon_arith_acc<q>")] 2972) 2973 2974(define_expand "<sup>sadv16qi" 2975 [(use (match_operand:V4SI 0 "register_operand")) 2976 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) 2977 (use (match_operand:V16QI 2 "register_operand"))] VABAL) 2978 (use (match_operand:V4SI 3 "register_operand"))] 2979 "TARGET_NEON" 2980 { 2981 rtx reduc = gen_reg_rtx (V8HImode); 2982 rtx op1_highpart = gen_reg_rtx (V8QImode); 2983 rtx op2_highpart = gen_reg_rtx (V8QImode); 2984 2985 emit_insn (gen_neon_vabdl<sup>v8qi (reduc, 2986 gen_lowpart (V8QImode, operands[1]), 2987 gen_lowpart (V8QImode, operands[2]))); 2988 2989 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1])); 2990 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2])); 2991 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc, 2992 op1_highpart, op2_highpart)); 2993 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc)); 2994 2995 emit_move_insn (operands[0], operands[3]); 2996 DONE; 2997 } 2998) 2999 3000(define_insn "neon_v<maxmin><sup><mode>" 3001 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3002 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 3003 (match_operand:VDQIW 2 "s_register_operand" "w")] 3004 VMAXMIN))] 3005 "TARGET_NEON" 3006 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3007 [(set_attr "type" "neon_minmax<q>")] 3008) 3009 3010(define_insn "neon_v<maxmin>f<mode>" 3011 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3012 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3013 (match_operand:VCVTF 2 "s_register_operand" "w")] 3014 VMAXMINF))] 3015 "TARGET_NEON" 3016 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3017 [(set_attr "type" "neon_fp_minmax_s<q>")] 3018) 3019 3020(define_insn "neon_v<maxmin>f<mode>" 3021 [(set (match_operand:VH 0 "s_register_operand" "=w") 3022 (unspec:VH 3023 [(match_operand:VH 1 "s_register_operand" "w") 3024 (match_operand:VH 2 "s_register_operand" "w")] 3025 VMAXMINF))] 3026 "TARGET_NEON_FP16INST" 3027 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3028 [(set_attr "type" "neon_fp_minmax_s<q>")] 3029) 3030 3031(define_insn "neon_vp<maxmin>fv4hf" 3032 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3033 (unspec:V4HF 3034 [(match_operand:V4HF 1 "s_register_operand" "w") 3035 (match_operand:V4HF 2 "s_register_operand" "w")] 3036 VPMAXMINF))] 3037 "TARGET_NEON_FP16INST" 3038 "vp<maxmin>.f16\t%P0, %P1, %P2" 3039 [(set_attr "type" "neon_reduc_minmax")] 3040) 3041 3042(define_insn "neon_<fmaxmin_op><mode>" 3043 [(set 3044 (match_operand:VH 0 "s_register_operand" "=w") 3045 (unspec:VH 3046 [(match_operand:VH 1 "s_register_operand" "w") 3047 (match_operand:VH 2 "s_register_operand" "w")] 3048 VMAXMINFNM))] 3049 "TARGET_NEON_FP16INST" 3050 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3051 [(set_attr "type" "neon_fp_minmax_s<q>")] 3052) 3053 3054;; v<maxmin>nm intrinsics. 3055(define_insn "neon_<fmaxmin_op><mode>" 3056 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3057 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3058 (match_operand:VCVTF 2 "s_register_operand" "w")] 3059 VMAXMINFNM))] 3060 "TARGET_NEON && TARGET_VFP5" 3061 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3062 [(set_attr "type" "neon_fp_minmax_s<q>")] 3063) 3064 3065;; Vector forms for the IEEE-754 fmax()/fmin() functions 3066(define_insn "<fmaxmin><mode>3" 3067 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3068 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3069 (match_operand:VCVTF 2 "s_register_operand" "w")] 3070 VMAXMINFNM))] 3071 "TARGET_NEON && TARGET_VFP5" 3072 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3073 [(set_attr "type" "neon_fp_minmax_s<q>")] 3074) 3075 3076(define_expand "neon_vpadd<mode>" 3077 [(match_operand:VD 0 "s_register_operand") 3078 (match_operand:VD 1 "s_register_operand") 3079 (match_operand:VD 2 "s_register_operand")] 3080 "TARGET_NEON" 3081{ 3082 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], 3083 operands[2])); 3084 DONE; 3085}) 3086 3087(define_insn "neon_vpaddl<sup><mode>" 3088 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 3089 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] 3090 VPADDL))] 3091 "TARGET_NEON" 3092 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3093 [(set_attr "type" "neon_reduc_add_long")] 3094) 3095 3096(define_insn "neon_vpadal<sup><mode>" 3097 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 3098 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") 3099 (match_operand:VDQIW 2 "s_register_operand" "w")] 3100 VPADAL))] 3101 "TARGET_NEON" 3102 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 3103 [(set_attr "type" "neon_reduc_add_acc")] 3104) 3105 3106(define_insn "neon_vp<maxmin><sup><mode>" 3107 [(set (match_operand:VDI 0 "s_register_operand" "=w") 3108 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 3109 (match_operand:VDI 2 "s_register_operand" "w")] 3110 VPMAXMIN))] 3111 "TARGET_NEON" 3112 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3113 [(set_attr "type" "neon_reduc_minmax<q>")] 3114) 3115 3116(define_insn "neon_vp<maxmin>f<mode>" 3117 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3118 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3119 (match_operand:VCVTF 2 "s_register_operand" "w")] 3120 VPMAXMINF))] 3121 "TARGET_NEON" 3122 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3123 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] 3124) 3125 3126(define_insn "neon_vrecps<mode>" 3127 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3128 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3129 (match_operand:VCVTF 2 "s_register_operand" "w")] 3130 UNSPEC_VRECPS))] 3131 "TARGET_NEON" 3132 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3133 [(set_attr "type" "neon_fp_recps_s<q>")] 3134) 3135 3136(define_insn "neon_vrecps<mode>" 3137 [(set 3138 (match_operand:VH 0 "s_register_operand" "=w") 3139 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3140 (match_operand:VH 2 "s_register_operand" "w")] 3141 UNSPEC_VRECPS))] 3142 "TARGET_NEON_FP16INST" 3143 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3144 [(set_attr "type" "neon_fp_recps_s<q>")] 3145) 3146 3147(define_insn "neon_vrsqrts<mode>" 3148 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3149 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3150 (match_operand:VCVTF 2 "s_register_operand" "w")] 3151 UNSPEC_VRSQRTS))] 3152 "TARGET_NEON" 3153 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3154 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 3155) 3156 3157(define_insn "neon_vrsqrts<mode>" 3158 [(set 3159 (match_operand:VH 0 "s_register_operand" "=w") 3160 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3161 (match_operand:VH 2 "s_register_operand" "w")] 3162 UNSPEC_VRSQRTS))] 3163 "TARGET_NEON_FP16INST" 3164 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3165 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 3166) 3167 3168(define_expand "neon_vabs<mode>" 3169 [(match_operand:VDQW 0 "s_register_operand") 3170 (match_operand:VDQW 1 "s_register_operand")] 3171 "TARGET_NEON" 3172{ 3173 emit_insn (gen_abs<mode>2 (operands[0], operands[1])); 3174 DONE; 3175}) 3176 3177(define_insn "neon_vqabs<mode>" 3178 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3179 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3180 UNSPEC_VQABS))] 3181 "TARGET_NEON" 3182 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3183 [(set_attr "type" "neon_qabs<q>")] 3184) 3185 3186(define_insn "neon_bswap<mode>" 3187 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 3188 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 3189 "TARGET_NEON" 3190 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" 3191 [(set_attr "type" "neon_rev<q>")] 3192) 3193 3194(define_expand "neon_vneg<mode>" 3195 [(match_operand:VDQW 0 "s_register_operand") 3196 (match_operand:VDQW 1 "s_register_operand")] 3197 "TARGET_NEON" 3198{ 3199 emit_insn (gen_neg<mode>2 (operands[0], operands[1])); 3200 DONE; 3201}) 3202 3203 3204;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the 3205;; fact that their usage need to guarantee that the source vectors are 3206;; contiguous. It would be wrong to describe the operation without being able 3207;; to describe the permute that is also required, but even if that is done 3208;; the permute would have been created as a LOAD_LANES which means the values 3209;; in the registers are in the wrong order. 3210(define_insn "neon_vcadd<rot><mode>" 3211 [(set (match_operand:VF 0 "register_operand" "=w") 3212 (unspec:VF [(match_operand:VF 1 "register_operand" "w") 3213 (match_operand:VF 2 "register_operand" "w")] 3214 VCADD))] 3215 "TARGET_COMPLEX" 3216 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>" 3217 [(set_attr "type" "neon_fcadd")] 3218) 3219 3220(define_insn "neon_vcmla<rot><mode>" 3221 [(set (match_operand:VF 0 "register_operand" "=w") 3222 (plus:VF (match_operand:VF 1 "register_operand" "0") 3223 (unspec:VF [(match_operand:VF 2 "register_operand" "w") 3224 (match_operand:VF 3 "register_operand" "w")] 3225 VCMLA)))] 3226 "TARGET_COMPLEX" 3227 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>" 3228 [(set_attr "type" "neon_fcmla")] 3229) 3230 3231(define_insn "neon_vcmla_lane<rot><mode>" 3232 [(set (match_operand:VF 0 "s_register_operand" "=w") 3233 (plus:VF (match_operand:VF 1 "s_register_operand" "0") 3234 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w") 3235 (match_operand:VF 3 "s_register_operand" "<VF_constraint>") 3236 (match_operand:SI 4 "const_int_operand" "n")] 3237 VCMLA)))] 3238 "TARGET_COMPLEX" 3239 { 3240 operands = neon_vcmla_lane_prepare_operands (operands); 3241 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 3242 } 3243 [(set_attr "type" "neon_fcmla")] 3244) 3245 3246(define_insn "neon_vcmla_laneq<rot><mode>" 3247 [(set (match_operand:VDF 0 "s_register_operand" "=w") 3248 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0") 3249 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w") 3250 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>") 3251 (match_operand:SI 4 "const_int_operand" "n")] 3252 VCMLA)))] 3253 "TARGET_COMPLEX" 3254 { 3255 operands = neon_vcmla_lane_prepare_operands (operands); 3256 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 3257 } 3258 [(set_attr "type" "neon_fcmla")] 3259) 3260 3261(define_insn "neon_vcmlaq_lane<rot><mode>" 3262 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w") 3263 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0") 3264 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w") 3265 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>") 3266 (match_operand:SI 4 "const_int_operand" "n")] 3267 VCMLA)))] 3268 "TARGET_COMPLEX" 3269 { 3270 operands = neon_vcmla_lane_prepare_operands (operands); 3271 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 3272 } 3273 [(set_attr "type" "neon_fcmla")] 3274) 3275 3276 3277;; These instructions map to the __builtins for the Dot Product operations. 3278(define_insn "neon_<sup>dot<vsi2qi>" 3279 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3280 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") 3281 (unspec:VCVTI [(match_operand:<VSI2QI> 2 3282 "register_operand" "w") 3283 (match_operand:<VSI2QI> 3 3284 "register_operand" "w")] 3285 DOTPROD)))] 3286 "TARGET_DOTPROD" 3287 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 3288 [(set_attr "type" "neon_dot<q>")] 3289) 3290 3291;; These instructions map to the __builtins for the Dot Product operations. 3292(define_insn "neon_usdot<vsi2qi>" 3293 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3294 (plus:VCVTI 3295 (unspec:VCVTI 3296 [(match_operand:<VSI2QI> 2 "register_operand" "w") 3297 (match_operand:<VSI2QI> 3 "register_operand" "w")] 3298 UNSPEC_DOT_US) 3299 (match_operand:VCVTI 1 "register_operand" "0")))] 3300 "TARGET_I8MM" 3301 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 3302 [(set_attr "type" "neon_dot<q>")] 3303) 3304 3305;; These instructions map to the __builtins for the Dot Product 3306;; indexed operations. 3307(define_insn "neon_<sup>dot_lane<vsi2qi>" 3308 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3309 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") 3310 (unspec:VCVTI [(match_operand:<VSI2QI> 2 3311 "register_operand" "w") 3312 (match_operand:V8QI 3 "register_operand" "t") 3313 (match_operand:SI 4 "immediate_operand" "i")] 3314 DOTPROD)))] 3315 "TARGET_DOTPROD" 3316 { 3317 operands[4] 3318 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); 3319 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; 3320 } 3321 [(set_attr "type" "neon_dot<q>")] 3322) 3323 3324;; These instructions map to the __builtins for the Dot Product 3325;; indexed operations in the v8.6 I8MM extension. 3326(define_insn "neon_<sup>dot_lane<vsi2qi>" 3327 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3328 (plus:VCVTI 3329 (unspec:VCVTI 3330 [(match_operand:<VSI2QI> 2 "register_operand" "w") 3331 (match_operand:V8QI 3 "register_operand" "t") 3332 (match_operand:SI 4 "immediate_operand" "i")] 3333 DOTPROD_I8MM) 3334 (match_operand:VCVTI 1 "register_operand" "0")))] 3335 "TARGET_I8MM" 3336 { 3337 operands[4] = GEN_INT (INTVAL (operands[4])); 3338 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; 3339 } 3340 [(set_attr "type" "neon_dot<q>")] 3341) 3342 3343;; These expands map to the Dot Product optab the vectorizer checks for. 3344;; The auto-vectorizer expects a dot product builtin that also does an 3345;; accumulation into the provided register. 3346;; Given the following pattern 3347;; 3348;; for (i=0; i<len; i++) { 3349;; c = a[i] * b[i]; 3350;; r += c; 3351;; } 3352;; return result; 3353;; 3354;; This can be auto-vectorized to 3355;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 3356;; 3357;; given enough iterations. However the vectorizer can keep unrolling the loop 3358;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 3359;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 3360;; ... 3361;; 3362;; and so the vectorizer provides r, in which the result has to be accumulated. 3363(define_expand "<sup>dot_prod<vsi2qi>" 3364 [(set (match_operand:VCVTI 0 "register_operand") 3365 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 3366 "register_operand") 3367 (match_operand:<VSI2QI> 2 3368 "register_operand")] 3369 DOTPROD) 3370 (match_operand:VCVTI 3 "register_operand")))] 3371 "TARGET_DOTPROD" 3372{ 3373 emit_insn ( 3374 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1], 3375 operands[2])); 3376 emit_insn (gen_rtx_SET (operands[0], operands[3])); 3377 DONE; 3378}) 3379 3380(define_expand "neon_copysignf<mode>" 3381 [(match_operand:VCVTF 0 "register_operand") 3382 (match_operand:VCVTF 1 "register_operand") 3383 (match_operand:VCVTF 2 "register_operand")] 3384 "TARGET_NEON" 3385 "{ 3386 rtx v_bitmask_cast; 3387 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); 3388 rtx c = gen_int_mode (0x80000000, SImode); 3389 3390 emit_move_insn (v_bitmask, 3391 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); 3392 emit_move_insn (operands[0], operands[2]); 3393 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, 3394 <VCVTF:V_cmp_result>mode, 0); 3395 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], 3396 operands[1])); 3397 3398 DONE; 3399 }" 3400) 3401 3402(define_insn "neon_vqneg<mode>" 3403 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3404 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3405 UNSPEC_VQNEG))] 3406 "TARGET_NEON" 3407 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3408 [(set_attr "type" "neon_qneg<q>")] 3409) 3410 3411(define_insn "neon_vcls<mode>" 3412 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3413 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3414 UNSPEC_VCLS))] 3415 "TARGET_NEON" 3416 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3417 [(set_attr "type" "neon_cls<q>")] 3418) 3419 3420(define_insn "clz<mode>2" 3421 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3422 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] 3423 "TARGET_NEON" 3424 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" 3425 [(set_attr "type" "neon_cnt<q>")] 3426) 3427 3428(define_expand "neon_vclz<mode>" 3429 [(match_operand:VDQIW 0 "s_register_operand") 3430 (match_operand:VDQIW 1 "s_register_operand")] 3431 "TARGET_NEON" 3432{ 3433 emit_insn (gen_clz<mode>2 (operands[0], operands[1])); 3434 DONE; 3435}) 3436 3437(define_insn "popcount<mode>2" 3438 [(set (match_operand:VE 0 "s_register_operand" "=w") 3439 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] 3440 "TARGET_NEON" 3441 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3442 [(set_attr "type" "neon_cnt<q>")] 3443) 3444 3445(define_expand "neon_vcnt<mode>" 3446 [(match_operand:VE 0 "s_register_operand") 3447 (match_operand:VE 1 "s_register_operand")] 3448 "TARGET_NEON" 3449{ 3450 emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); 3451 DONE; 3452}) 3453 3454(define_insn "neon_vrecpe<mode>" 3455 [(set (match_operand:VH 0 "s_register_operand" "=w") 3456 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] 3457 UNSPEC_VRECPE))] 3458 "TARGET_NEON_FP16INST" 3459 "vrecpe.f16\t%<V_reg>0, %<V_reg>1" 3460 [(set_attr "type" "neon_fp_recpe_s<q>")] 3461) 3462 3463(define_insn "neon_vrecpe<mode>" 3464 [(set (match_operand:V32 0 "s_register_operand" "=w") 3465 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3466 UNSPEC_VRECPE))] 3467 "TARGET_NEON" 3468 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3469 [(set_attr "type" "neon_fp_recpe_s<q>")] 3470) 3471 3472(define_insn "neon_vrsqrte<mode>" 3473 [(set (match_operand:V32 0 "s_register_operand" "=w") 3474 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3475 UNSPEC_VRSQRTE))] 3476 "TARGET_NEON" 3477 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3478 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 3479) 3480 3481(define_expand "neon_vmvn<mode>" 3482 [(match_operand:VDQIW 0 "s_register_operand") 3483 (match_operand:VDQIW 1 "s_register_operand")] 3484 "TARGET_NEON" 3485{ 3486 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1])); 3487 DONE; 3488}) 3489 3490(define_insn "neon_vget_lane<mode>_sext_internal" 3491 [(set (match_operand:SI 0 "s_register_operand" "=r") 3492 (sign_extend:SI 3493 (vec_select:<V_elem> 3494 (match_operand:VD 1 "s_register_operand" "w") 3495 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3496 "TARGET_NEON" 3497{ 3498 if (BYTES_BIG_ENDIAN) 3499 { 3500 int elt = INTVAL (operands[2]); 3501 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3502 operands[2] = GEN_INT (elt); 3503 } 3504 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; 3505} 3506 [(set_attr "type" "neon_to_gp")] 3507) 3508 3509(define_insn "neon_vget_lane<mode>_zext_internal" 3510 [(set (match_operand:SI 0 "s_register_operand" "=r") 3511 (zero_extend:SI 3512 (vec_select:<V_elem> 3513 (match_operand:VD 1 "s_register_operand" "w") 3514 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3515 "TARGET_NEON" 3516{ 3517 if (BYTES_BIG_ENDIAN) 3518 { 3519 int elt = INTVAL (operands[2]); 3520 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3521 operands[2] = GEN_INT (elt); 3522 } 3523 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; 3524} 3525 [(set_attr "type" "neon_to_gp")] 3526) 3527 3528(define_insn "neon_vget_lane<mode>_sext_internal" 3529 [(set (match_operand:SI 0 "s_register_operand" "=r") 3530 (sign_extend:SI 3531 (vec_select:<V_elem> 3532 (match_operand:VQ2 1 "s_register_operand" "w") 3533 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3534 "TARGET_NEON" 3535{ 3536 rtx ops[3]; 3537 int regno = REGNO (operands[1]); 3538 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3539 unsigned int elt = INTVAL (operands[2]); 3540 unsigned int elt_adj = elt % halfelts; 3541 3542 if (BYTES_BIG_ENDIAN) 3543 elt_adj = halfelts - 1 - elt_adj; 3544 3545 ops[0] = operands[0]; 3546 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3547 ops[2] = GEN_INT (elt_adj); 3548 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); 3549 3550 return ""; 3551} 3552 [(set_attr "type" "neon_to_gp_q")] 3553) 3554 3555(define_insn "neon_vget_lane<mode>_zext_internal" 3556 [(set (match_operand:SI 0 "s_register_operand" "=r") 3557 (zero_extend:SI 3558 (vec_select:<V_elem> 3559 (match_operand:VQ2 1 "s_register_operand" "w") 3560 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3561 "TARGET_NEON" 3562{ 3563 rtx ops[3]; 3564 int regno = REGNO (operands[1]); 3565 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3566 unsigned int elt = INTVAL (operands[2]); 3567 unsigned int elt_adj = elt % halfelts; 3568 3569 if (BYTES_BIG_ENDIAN) 3570 elt_adj = halfelts - 1 - elt_adj; 3571 3572 ops[0] = operands[0]; 3573 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3574 ops[2] = GEN_INT (elt_adj); 3575 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); 3576 3577 return ""; 3578} 3579 [(set_attr "type" "neon_to_gp_q")] 3580) 3581 3582(define_expand "neon_vget_lane<mode>" 3583 [(match_operand:<V_ext> 0 "s_register_operand") 3584 (match_operand:VDQW 1 "s_register_operand") 3585 (match_operand:SI 2 "immediate_operand")] 3586 "TARGET_NEON" 3587{ 3588 if (BYTES_BIG_ENDIAN) 3589 { 3590 /* The intrinsics are defined in terms of a model where the 3591 element ordering in memory is vldm order, whereas the generic 3592 RTL is defined in terms of a model where the element ordering 3593 in memory is array order. Convert the lane number to conform 3594 to this model. */ 3595 unsigned int elt = INTVAL (operands[2]); 3596 unsigned int reg_nelts 3597 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3598 elt ^= reg_nelts - 1; 3599 operands[2] = GEN_INT (elt); 3600 } 3601 3602 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3603 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3604 operands[2])); 3605 else 3606 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], 3607 operands[1], 3608 operands[2])); 3609 DONE; 3610}) 3611 3612(define_expand "neon_vget_laneu<mode>" 3613 [(match_operand:<V_ext> 0 "s_register_operand") 3614 (match_operand:VDQIW 1 "s_register_operand") 3615 (match_operand:SI 2 "immediate_operand")] 3616 "TARGET_NEON" 3617{ 3618 if (BYTES_BIG_ENDIAN) 3619 { 3620 /* The intrinsics are defined in terms of a model where the 3621 element ordering in memory is vldm order, whereas the generic 3622 RTL is defined in terms of a model where the element ordering 3623 in memory is array order. Convert the lane number to conform 3624 to this model. */ 3625 unsigned int elt = INTVAL (operands[2]); 3626 unsigned int reg_nelts 3627 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3628 elt ^= reg_nelts - 1; 3629 operands[2] = GEN_INT (elt); 3630 } 3631 3632 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3633 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3634 operands[2])); 3635 else 3636 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], 3637 operands[1], 3638 operands[2])); 3639 DONE; 3640}) 3641 3642(define_expand "neon_vget_lanedi" 3643 [(match_operand:DI 0 "s_register_operand") 3644 (match_operand:DI 1 "s_register_operand") 3645 (match_operand:SI 2 "immediate_operand")] 3646 "TARGET_NEON" 3647{ 3648 emit_move_insn (operands[0], operands[1]); 3649 DONE; 3650}) 3651 3652(define_expand "neon_vget_lanev2di" 3653 [(match_operand:DI 0 "s_register_operand") 3654 (match_operand:V2DI 1 "s_register_operand") 3655 (match_operand:SI 2 "immediate_operand")] 3656 "TARGET_NEON" 3657{ 3658 int lane; 3659 3660if (BYTES_BIG_ENDIAN) 3661 { 3662 /* The intrinsics are defined in terms of a model where the 3663 element ordering in memory is vldm order, whereas the generic 3664 RTL is defined in terms of a model where the element ordering 3665 in memory is array order. Convert the lane number to conform 3666 to this model. */ 3667 unsigned int elt = INTVAL (operands[2]); 3668 unsigned int reg_nelts = 2; 3669 elt ^= reg_nelts - 1; 3670 operands[2] = GEN_INT (elt); 3671 } 3672 3673 lane = INTVAL (operands[2]); 3674 gcc_assert ((lane ==0) || (lane == 1)); 3675 emit_move_insn (operands[0], lane == 0 3676 ? gen_lowpart (DImode, operands[1]) 3677 : gen_highpart (DImode, operands[1])); 3678 DONE; 3679}) 3680 3681(define_expand "neon_vset_lane<mode>" 3682 [(match_operand:VDQ 0 "s_register_operand") 3683 (match_operand:<V_elem> 1 "s_register_operand") 3684 (match_operand:VDQ 2 "s_register_operand") 3685 (match_operand:SI 3 "immediate_operand")] 3686 "TARGET_NEON" 3687{ 3688 unsigned int elt = INTVAL (operands[3]); 3689 3690 if (BYTES_BIG_ENDIAN) 3691 { 3692 unsigned int reg_nelts 3693 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3694 elt ^= reg_nelts - 1; 3695 } 3696 3697 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 3698 GEN_INT (1 << elt), operands[2])); 3699 DONE; 3700}) 3701 3702; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. 3703 3704(define_expand "neon_vset_lanedi" 3705 [(match_operand:DI 0 "s_register_operand") 3706 (match_operand:DI 1 "s_register_operand") 3707 (match_operand:DI 2 "s_register_operand") 3708 (match_operand:SI 3 "immediate_operand")] 3709 "TARGET_NEON" 3710{ 3711 emit_move_insn (operands[0], operands[1]); 3712 DONE; 3713}) 3714 3715(define_expand "neon_vcreate<mode>" 3716 [(match_operand:VD_RE 0 "s_register_operand") 3717 (match_operand:DI 1 "general_operand")] 3718 "TARGET_NEON" 3719{ 3720 rtx src = gen_lowpart (<MODE>mode, operands[1]); 3721 emit_move_insn (operands[0], src); 3722 DONE; 3723}) 3724 3725(define_insn "neon_vdup_n<mode>" 3726 [(set (match_operand:VX 0 "s_register_operand" "=w") 3727 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] 3728 "TARGET_NEON" 3729 "vdup.<V_sz_elem>\t%<V_reg>0, %1" 3730 [(set_attr "type" "neon_from_gp<q>")] 3731) 3732 3733(define_insn "neon_vdup_nv4hf" 3734 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3735 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] 3736 "TARGET_NEON" 3737 "vdup.16\t%P0, %1" 3738 [(set_attr "type" "neon_from_gp")] 3739) 3740 3741(define_insn "neon_vdup_nv8hf" 3742 [(set (match_operand:V8HF 0 "s_register_operand" "=w") 3743 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] 3744 "TARGET_NEON" 3745 "vdup.16\t%q0, %1" 3746 [(set_attr "type" "neon_from_gp_q")] 3747) 3748 3749(define_insn "neon_vdup_nv4bf" 3750 [(set (match_operand:V4BF 0 "s_register_operand" "=w") 3751 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))] 3752 "TARGET_NEON" 3753 "vdup.16\t%P0, %1" 3754 [(set_attr "type" "neon_from_gp")] 3755) 3756 3757(define_insn "neon_vdup_nv8bf" 3758 [(set (match_operand:V8BF 0 "s_register_operand" "=w") 3759 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))] 3760 "TARGET_NEON" 3761 "vdup.16\t%q0, %1" 3762 [(set_attr "type" "neon_from_gp_q")] 3763) 3764 3765(define_insn "neon_vdup_n<mode>" 3766 [(set (match_operand:V32 0 "s_register_operand" "=w,w") 3767 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] 3768 "TARGET_NEON" 3769 "@ 3770 vdup.<V_sz_elem>\t%<V_reg>0, %1 3771 vdup.<V_sz_elem>\t%<V_reg>0, %y1" 3772 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] 3773) 3774 3775(define_expand "neon_vdup_ndi" 3776 [(match_operand:DI 0 "s_register_operand") 3777 (match_operand:DI 1 "s_register_operand")] 3778 "TARGET_NEON" 3779{ 3780 emit_move_insn (operands[0], operands[1]); 3781 DONE; 3782} 3783) 3784 3785(define_insn "neon_vdup_nv2di" 3786 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 3787 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] 3788 "TARGET_NEON" 3789 "@ 3790 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 3791 vmov\t%e0, %P1\;vmov\t%f0, %P1" 3792 [(set_attr "length" "8") 3793 (set_attr "type" "multiple")] 3794) 3795 3796(define_insn "neon_vdup_lane<mode>_internal" 3797 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 3798 (vec_duplicate:VDQW 3799 (vec_select:<V_elem> 3800 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3801 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3802 "TARGET_NEON" 3803{ 3804 if (BYTES_BIG_ENDIAN) 3805 { 3806 int elt = INTVAL (operands[2]); 3807 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3808 operands[2] = GEN_INT (elt); 3809 } 3810 if (<Is_d_reg>) 3811 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3812 else 3813 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3814} 3815 [(set_attr "type" "neon_dup<q>")] 3816) 3817 3818(define_insn "neon_vdup_lane<mode>_internal" 3819 [(set (match_operand:VHFBF 0 "s_register_operand" "=w") 3820 (vec_duplicate:VHFBF 3821 (vec_select:<V_elem> 3822 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3823 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3824 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" 3825{ 3826 if (BYTES_BIG_ENDIAN) 3827 { 3828 int elt = INTVAL (operands[2]); 3829 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3830 operands[2] = GEN_INT (elt); 3831 } 3832 if (<Is_d_reg>) 3833 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3834 else 3835 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3836} 3837 [(set_attr "type" "neon_dup<q>")] 3838) 3839 3840(define_expand "neon_vdup_lane<mode>" 3841 [(match_operand:VDQW 0 "s_register_operand") 3842 (match_operand:<V_double_vector_mode> 1 "s_register_operand") 3843 (match_operand:SI 2 "immediate_operand")] 3844 "TARGET_NEON" 3845{ 3846 if (BYTES_BIG_ENDIAN) 3847 { 3848 unsigned int elt = INTVAL (operands[2]); 3849 unsigned int reg_nelts 3850 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3851 elt ^= reg_nelts - 1; 3852 operands[2] = GEN_INT (elt); 3853 } 3854 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3855 operands[2])); 3856 DONE; 3857}) 3858 3859(define_expand "neon_vdup_lane<mode>" 3860 [(match_operand:VHFBF 0 "s_register_operand") 3861 (match_operand:<V_double_vector_mode> 1 "s_register_operand") 3862 (match_operand:SI 2 "immediate_operand")] 3863 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" 3864{ 3865 if (BYTES_BIG_ENDIAN) 3866 { 3867 unsigned int elt = INTVAL (operands[2]); 3868 unsigned int reg_nelts 3869 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3870 elt ^= reg_nelts - 1; 3871 operands[2] = GEN_INT (elt); 3872 } 3873 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3874 operands[2])); 3875 DONE; 3876}) 3877 3878; Scalar index is ignored, since only zero is valid here. 3879(define_expand "neon_vdup_lanedi" 3880 [(match_operand:DI 0 "s_register_operand") 3881 (match_operand:DI 1 "s_register_operand") 3882 (match_operand:SI 2 "immediate_operand")] 3883 "TARGET_NEON" 3884{ 3885 emit_move_insn (operands[0], operands[1]); 3886 DONE; 3887}) 3888 3889; Likewise for v2di, as the DImode second operand has only a single element. 3890(define_expand "neon_vdup_lanev2di" 3891 [(match_operand:V2DI 0 "s_register_operand") 3892 (match_operand:DI 1 "s_register_operand") 3893 (match_operand:SI 2 "immediate_operand")] 3894 "TARGET_NEON" 3895{ 3896 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); 3897 DONE; 3898}) 3899 3900; Disabled before reload because we don't want combine doing something silly, 3901; but used by the post-reload expansion of neon_vcombine. 3902(define_insn "*neon_vswp<mode>" 3903 [(set (match_operand:VDQX 0 "s_register_operand" "+w") 3904 (match_operand:VDQX 1 "s_register_operand" "+w")) 3905 (set (match_dup 1) (match_dup 0))] 3906 "TARGET_NEON && reload_completed" 3907 "vswp\t%<V_reg>0, %<V_reg>1" 3908 [(set_attr "type" "neon_permute<q>")] 3909) 3910 3911;; In this insn, operand 1 should be low, and operand 2 the high part of the 3912;; dest vector. 3913;; FIXME: A different implementation of this builtin could make it much 3914;; more likely that we wouldn't actually need to output anything (we could make 3915;; it so that the reg allocator puts things in the right places magically 3916;; instead). Lack of subregs for vectors makes that tricky though, I think. 3917 3918(define_insn_and_split "neon_vcombine<mode>" 3919 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") 3920 (vec_concat:<V_DOUBLE> 3921 (match_operand:VDX 1 "s_register_operand" "w") 3922 (match_operand:VDX 2 "s_register_operand" "w")))] 3923 "TARGET_NEON" 3924 "#" 3925 "&& reload_completed" 3926 [(const_int 0)] 3927{ 3928 neon_split_vcombine (operands); 3929 DONE; 3930} 3931[(set_attr "type" "multiple")] 3932) 3933 3934(define_expand "neon_vget_high<mode>" 3935 [(match_operand:<V_HALF> 0 "s_register_operand") 3936 (match_operand:VQXBF 1 "s_register_operand")] 3937 "TARGET_NEON" 3938{ 3939 emit_move_insn (operands[0], 3940 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 3941 GET_MODE_SIZE (<V_HALF>mode))); 3942 DONE; 3943}) 3944 3945(define_expand "neon_vget_low<mode>" 3946 [(match_operand:<V_HALF> 0 "s_register_operand") 3947 (match_operand:VQX 1 "s_register_operand")] 3948 "TARGET_NEON" 3949{ 3950 emit_move_insn (operands[0], 3951 simplify_gen_subreg (<V_HALF>mode, operands[1], 3952 <MODE>mode, 0)); 3953 DONE; 3954}) 3955 3956(define_insn "float<mode><V_cvtto>2" 3957 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3958 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3959 "TARGET_NEON && !flag_rounding_math" 3960 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" 3961 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3962) 3963 3964(define_insn "floatuns<mode><V_cvtto>2" 3965 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3966 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3967 "TARGET_NEON && !flag_rounding_math" 3968 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" 3969 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3970) 3971 3972(define_insn "fix_trunc<mode><V_cvtto>2" 3973 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3974 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3975 "TARGET_NEON" 3976 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" 3977 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3978) 3979 3980(define_insn "fixuns_trunc<mode><V_cvtto>2" 3981 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3982 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3983 "TARGET_NEON" 3984 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" 3985 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3986) 3987 3988(define_insn "neon_vcvt<sup><mode>" 3989 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3990 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] 3991 VCVT_US))] 3992 "TARGET_NEON" 3993 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" 3994 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3995) 3996 3997(define_insn "neon_vcvt<sup><mode>" 3998 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3999 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] 4000 VCVT_US))] 4001 "TARGET_NEON" 4002 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" 4003 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4004) 4005 4006(define_insn "neon_vcvtv4sfv4hf" 4007 [(set (match_operand:V4SF 0 "s_register_operand" "=w") 4008 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] 4009 UNSPEC_VCVT))] 4010 "TARGET_NEON && TARGET_FP16" 4011 "vcvt.f32.f16\t%q0, %P1" 4012 [(set_attr "type" "neon_fp_cvt_widen_h")] 4013) 4014 4015(define_insn "neon_vcvtv4hfv4sf" 4016 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 4017 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] 4018 UNSPEC_VCVT))] 4019 "TARGET_NEON && TARGET_FP16" 4020 "vcvt.f16.f32\t%P0, %q1" 4021 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 4022) 4023 4024(define_insn "neon_vcvt<sup><mode>" 4025 [(set 4026 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4027 (unspec:<VH_CVTTO> 4028 [(match_operand:VCVTHI 1 "s_register_operand" "w")] 4029 VCVT_US))] 4030 "TARGET_NEON_FP16INST" 4031 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" 4032 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 4033) 4034 4035(define_insn "neon_vcvt<sup><mode>" 4036 [(set 4037 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4038 (unspec:<VH_CVTTO> 4039 [(match_operand:VH 1 "s_register_operand" "w")] 4040 VCVT_US))] 4041 "TARGET_NEON_FP16INST" 4042 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 4043 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4044) 4045 4046(define_insn "neon_vcvt<sup>_n<mode>" 4047 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4048 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") 4049 (match_operand:SI 2 "immediate_operand" "i")] 4050 VCVT_US_N))] 4051 "TARGET_NEON" 4052{ 4053 arm_const_bounds (operands[2], 1, 33); 4054 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; 4055} 4056 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 4057) 4058 4059(define_insn "neon_vcvt<sup>_n<mode>" 4060 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4061 (unspec:<VH_CVTTO> 4062 [(match_operand:VH 1 "s_register_operand" "w") 4063 (match_operand:SI 2 "immediate_operand" "i")] 4064 VCVT_US_N))] 4065 "TARGET_NEON_FP16INST" 4066{ 4067 arm_const_bounds (operands[2], 0, 17); 4068 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; 4069} 4070 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4071) 4072 4073(define_insn "neon_vcvt<sup>_n<mode>" 4074 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4075 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") 4076 (match_operand:SI 2 "immediate_operand" "i")] 4077 VCVT_US_N))] 4078 "TARGET_NEON" 4079{ 4080 arm_const_bounds (operands[2], 1, 33); 4081 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; 4082} 4083 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4084) 4085 4086(define_insn "neon_vcvt<sup>_n<mode>" 4087 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4088 (unspec:<VH_CVTTO> 4089 [(match_operand:VCVTHI 1 "s_register_operand" "w") 4090 (match_operand:SI 2 "immediate_operand" "i")] 4091 VCVT_US_N))] 4092 "TARGET_NEON_FP16INST" 4093{ 4094 arm_const_bounds (operands[2], 0, 17); 4095 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; 4096} 4097 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 4098) 4099 4100(define_insn "neon_vcvt<vcvth_op><sup><mode>" 4101 [(set 4102 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4103 (unspec:<VH_CVTTO> 4104 [(match_operand:VH 1 "s_register_operand" "w")] 4105 VCVT_HF_US))] 4106 "TARGET_NEON_FP16INST" 4107 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 4108 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4109) 4110 4111(define_insn "neon_vmovn<mode>" 4112 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4113 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4114 UNSPEC_VMOVN))] 4115 "TARGET_NEON" 4116 "vmovn.<V_if_elem>\t%P0, %q1" 4117 [(set_attr "type" "neon_shift_imm_narrow_q")] 4118) 4119 4120(define_insn "neon_vqmovn<sup><mode>" 4121 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4122 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4123 VQMOVN))] 4124 "TARGET_NEON" 4125 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" 4126 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4127) 4128 4129(define_insn "neon_vqmovun<mode>" 4130 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4131 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4132 UNSPEC_VQMOVUN))] 4133 "TARGET_NEON" 4134 "vqmovun.<V_s_elem>\t%P0, %q1" 4135 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4136) 4137 4138(define_insn "neon_vmovl<sup><mode>" 4139 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4140 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] 4141 VMOVL))] 4142 "TARGET_NEON" 4143 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" 4144 [(set_attr "type" "neon_shift_imm_long")] 4145) 4146 4147(define_insn "neon_vmul_lane<mode>" 4148 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4149 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") 4150 (match_operand:VMD 2 "s_register_operand" 4151 "<scalar_mul_constraint>") 4152 (match_operand:SI 3 "immediate_operand" "i")] 4153 UNSPEC_VMUL_LANE))] 4154 "TARGET_NEON" 4155{ 4156 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; 4157} 4158 [(set (attr "type") 4159 (if_then_else (match_test "<Is_float_mode>") 4160 (const_string "neon_fp_mul_s_scalar<q>") 4161 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 4162) 4163 4164(define_insn "neon_vmul_lane<mode>" 4165 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4166 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") 4167 (match_operand:<V_HALF> 2 "s_register_operand" 4168 "<scalar_mul_constraint>") 4169 (match_operand:SI 3 "immediate_operand" "i")] 4170 UNSPEC_VMUL_LANE))] 4171 "TARGET_NEON" 4172{ 4173 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; 4174} 4175 [(set (attr "type") 4176 (if_then_else (match_test "<Is_float_mode>") 4177 (const_string "neon_fp_mul_s_scalar<q>") 4178 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 4179) 4180 4181(define_insn "neon_vmul_lane<mode>" 4182 [(set (match_operand:VH 0 "s_register_operand" "=w") 4183 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 4184 (match_operand:V4HF 2 "s_register_operand" 4185 "<scalar_mul_constraint>") 4186 (match_operand:SI 3 "immediate_operand" "i")] 4187 UNSPEC_VMUL_LANE))] 4188 "TARGET_NEON_FP16INST" 4189 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" 4190 [(set_attr "type" "neon_fp_mul_s_scalar<q>")] 4191) 4192 4193(define_insn "neon_vmull<sup>_lane<mode>" 4194 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4195 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 4196 (match_operand:VMDI 2 "s_register_operand" 4197 "<scalar_mul_constraint>") 4198 (match_operand:SI 3 "immediate_operand" "i")] 4199 VMULL_LANE))] 4200 "TARGET_NEON" 4201{ 4202 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; 4203} 4204 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] 4205) 4206 4207(define_insn "neon_vqdmull_lane<mode>" 4208 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4209 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 4210 (match_operand:VMDI 2 "s_register_operand" 4211 "<scalar_mul_constraint>") 4212 (match_operand:SI 3 "immediate_operand" "i")] 4213 UNSPEC_VQDMULL_LANE))] 4214 "TARGET_NEON" 4215{ 4216 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; 4217} 4218 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] 4219) 4220 4221(define_insn "neon_vq<r>dmulh_lane<mode>" 4222 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 4223 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") 4224 (match_operand:<V_HALF> 2 "s_register_operand" 4225 "<scalar_mul_constraint>") 4226 (match_operand:SI 3 "immediate_operand" "i")] 4227 VQDMULH_LANE))] 4228 "TARGET_NEON" 4229{ 4230 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; 4231} 4232 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 4233) 4234 4235(define_insn "neon_vq<r>dmulh_lane<mode>" 4236 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 4237 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") 4238 (match_operand:VMDI 2 "s_register_operand" 4239 "<scalar_mul_constraint>") 4240 (match_operand:SI 3 "immediate_operand" "i")] 4241 VQDMULH_LANE))] 4242 "TARGET_NEON" 4243{ 4244 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; 4245} 4246 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 4247) 4248 4249;; vqrdmlah_lane, vqrdmlsh_lane 4250(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 4251 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 4252 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") 4253 (match_operand:VMQI 2 "s_register_operand" "w") 4254 (match_operand:<V_HALF> 3 "s_register_operand" 4255 "<scalar_mul_constraint>") 4256 (match_operand:SI 4 "immediate_operand" "i")] 4257 VQRDMLH_AS))] 4258 "TARGET_NEON_RDMA" 4259{ 4260 return 4261 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; 4262} 4263 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] 4264) 4265 4266(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 4267 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 4268 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") 4269 (match_operand:VMDI 2 "s_register_operand" "w") 4270 (match_operand:VMDI 3 "s_register_operand" 4271 "<scalar_mul_constraint>") 4272 (match_operand:SI 4 "immediate_operand" "i")] 4273 VQRDMLH_AS))] 4274 "TARGET_NEON_RDMA" 4275{ 4276 return 4277 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; 4278} 4279 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] 4280) 4281 4282(define_insn "neon_vmla_lane<mode>" 4283 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4284 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 4285 (match_operand:VMD 2 "s_register_operand" "w") 4286 (match_operand:VMD 3 "s_register_operand" 4287 "<scalar_mul_constraint>") 4288 (match_operand:SI 4 "immediate_operand" "i")] 4289 UNSPEC_VMLA_LANE))] 4290 "TARGET_NEON" 4291{ 4292 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 4293} 4294 [(set (attr "type") 4295 (if_then_else (match_test "<Is_float_mode>") 4296 (const_string "neon_fp_mla_s_scalar<q>") 4297 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4298) 4299 4300(define_insn "neon_vmla_lane<mode>" 4301 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4302 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 4303 (match_operand:VMQ 2 "s_register_operand" "w") 4304 (match_operand:<V_HALF> 3 "s_register_operand" 4305 "<scalar_mul_constraint>") 4306 (match_operand:SI 4 "immediate_operand" "i")] 4307 UNSPEC_VMLA_LANE))] 4308 "TARGET_NEON" 4309{ 4310 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 4311} 4312 [(set (attr "type") 4313 (if_then_else (match_test "<Is_float_mode>") 4314 (const_string "neon_fp_mla_s_scalar<q>") 4315 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4316) 4317 4318(define_insn "neon_vmlal<sup>_lane<mode>" 4319 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4320 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4321 (match_operand:VMDI 2 "s_register_operand" "w") 4322 (match_operand:VMDI 3 "s_register_operand" 4323 "<scalar_mul_constraint>") 4324 (match_operand:SI 4 "immediate_operand" "i")] 4325 VMLAL_LANE))] 4326 "TARGET_NEON" 4327{ 4328 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4329} 4330 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4331) 4332 4333(define_insn "neon_vqdmlal_lane<mode>" 4334 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4335 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4336 (match_operand:VMDI 2 "s_register_operand" "w") 4337 (match_operand:VMDI 3 "s_register_operand" 4338 "<scalar_mul_constraint>") 4339 (match_operand:SI 4 "immediate_operand" "i")] 4340 UNSPEC_VQDMLAL_LANE))] 4341 "TARGET_NEON" 4342{ 4343 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4344} 4345 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4346) 4347 4348(define_insn "neon_vmls_lane<mode>" 4349 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4350 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 4351 (match_operand:VMD 2 "s_register_operand" "w") 4352 (match_operand:VMD 3 "s_register_operand" 4353 "<scalar_mul_constraint>") 4354 (match_operand:SI 4 "immediate_operand" "i")] 4355 UNSPEC_VMLS_LANE))] 4356 "TARGET_NEON" 4357{ 4358 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 4359} 4360 [(set (attr "type") 4361 (if_then_else (match_test "<Is_float_mode>") 4362 (const_string "neon_fp_mla_s_scalar<q>") 4363 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4364) 4365 4366(define_insn "neon_vmls_lane<mode>" 4367 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4368 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 4369 (match_operand:VMQ 2 "s_register_operand" "w") 4370 (match_operand:<V_HALF> 3 "s_register_operand" 4371 "<scalar_mul_constraint>") 4372 (match_operand:SI 4 "immediate_operand" "i")] 4373 UNSPEC_VMLS_LANE))] 4374 "TARGET_NEON" 4375{ 4376 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 4377} 4378 [(set (attr "type") 4379 (if_then_else (match_test "<Is_float_mode>") 4380 (const_string "neon_fp_mla_s_scalar<q>") 4381 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4382) 4383 4384(define_insn "neon_vmlsl<sup>_lane<mode>" 4385 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4386 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4387 (match_operand:VMDI 2 "s_register_operand" "w") 4388 (match_operand:VMDI 3 "s_register_operand" 4389 "<scalar_mul_constraint>") 4390 (match_operand:SI 4 "immediate_operand" "i")] 4391 VMLSL_LANE))] 4392 "TARGET_NEON" 4393{ 4394 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4395} 4396 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4397) 4398 4399(define_insn "neon_vqdmlsl_lane<mode>" 4400 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4401 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4402 (match_operand:VMDI 2 "s_register_operand" "w") 4403 (match_operand:VMDI 3 "s_register_operand" 4404 "<scalar_mul_constraint>") 4405 (match_operand:SI 4 "immediate_operand" "i")] 4406 UNSPEC_VQDMLSL_LANE))] 4407 "TARGET_NEON" 4408{ 4409 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4410} 4411 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4412) 4413 4414; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a 4415; core register into a temp register, then use a scalar taken from that. This 4416; isn't an optimal solution if e.g. the scalar has just been read from memory 4417; or extracted from another vector. The latter case it's currently better to 4418; use the "_lane" variant, and the former case can probably be implemented 4419; using vld1_lane, but that hasn't been done yet. 4420 4421(define_expand "neon_vmul_n<mode>" 4422 [(match_operand:VMD 0 "s_register_operand") 4423 (match_operand:VMD 1 "s_register_operand") 4424 (match_operand:<V_elem> 2 "s_register_operand")] 4425 "TARGET_NEON" 4426{ 4427 rtx tmp = gen_reg_rtx (<MODE>mode); 4428 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4429 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4430 const0_rtx)); 4431 DONE; 4432}) 4433 4434(define_expand "neon_vmul_n<mode>" 4435 [(match_operand:VMQ 0 "s_register_operand") 4436 (match_operand:VMQ 1 "s_register_operand") 4437 (match_operand:<V_elem> 2 "s_register_operand")] 4438 "TARGET_NEON" 4439{ 4440 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4441 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4442 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4443 const0_rtx)); 4444 DONE; 4445}) 4446 4447(define_expand "neon_vmul_n<mode>" 4448 [(match_operand:VH 0 "s_register_operand") 4449 (match_operand:VH 1 "s_register_operand") 4450 (match_operand:<V_elem> 2 "s_register_operand")] 4451 "TARGET_NEON_FP16INST" 4452{ 4453 rtx tmp = gen_reg_rtx (V4HFmode); 4454 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); 4455 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4456 const0_rtx)); 4457 DONE; 4458}) 4459 4460(define_expand "neon_vmulls_n<mode>" 4461 [(match_operand:<V_widen> 0 "s_register_operand") 4462 (match_operand:VMDI 1 "s_register_operand") 4463 (match_operand:<V_elem> 2 "s_register_operand")] 4464 "TARGET_NEON" 4465{ 4466 rtx tmp = gen_reg_rtx (<MODE>mode); 4467 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4468 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, 4469 const0_rtx)); 4470 DONE; 4471}) 4472 4473(define_expand "neon_vmullu_n<mode>" 4474 [(match_operand:<V_widen> 0 "s_register_operand") 4475 (match_operand:VMDI 1 "s_register_operand") 4476 (match_operand:<V_elem> 2 "s_register_operand")] 4477 "TARGET_NEON" 4478{ 4479 rtx tmp = gen_reg_rtx (<MODE>mode); 4480 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4481 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, 4482 const0_rtx)); 4483 DONE; 4484}) 4485 4486(define_expand "neon_vqdmull_n<mode>" 4487 [(match_operand:<V_widen> 0 "s_register_operand") 4488 (match_operand:VMDI 1 "s_register_operand") 4489 (match_operand:<V_elem> 2 "s_register_operand")] 4490 "TARGET_NEON" 4491{ 4492 rtx tmp = gen_reg_rtx (<MODE>mode); 4493 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4494 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, 4495 const0_rtx)); 4496 DONE; 4497}) 4498 4499(define_expand "neon_vqdmulh_n<mode>" 4500 [(match_operand:VMDI 0 "s_register_operand") 4501 (match_operand:VMDI 1 "s_register_operand") 4502 (match_operand:<V_elem> 2 "s_register_operand")] 4503 "TARGET_NEON" 4504{ 4505 rtx tmp = gen_reg_rtx (<MODE>mode); 4506 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4507 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4508 const0_rtx)); 4509 DONE; 4510}) 4511 4512(define_expand "neon_vqrdmulh_n<mode>" 4513 [(match_operand:VMDI 0 "s_register_operand") 4514 (match_operand:VMDI 1 "s_register_operand") 4515 (match_operand:<V_elem> 2 "s_register_operand")] 4516 "TARGET_NEON" 4517{ 4518 rtx tmp = gen_reg_rtx (<MODE>mode); 4519 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4520 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4521 const0_rtx)); 4522 DONE; 4523}) 4524 4525(define_expand "neon_vqdmulh_n<mode>" 4526 [(match_operand:VMQI 0 "s_register_operand") 4527 (match_operand:VMQI 1 "s_register_operand") 4528 (match_operand:<V_elem> 2 "s_register_operand")] 4529 "TARGET_NEON" 4530{ 4531 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4532 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4533 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4534 const0_rtx)); 4535 DONE; 4536}) 4537 4538(define_expand "neon_vqrdmulh_n<mode>" 4539 [(match_operand:VMQI 0 "s_register_operand") 4540 (match_operand:VMQI 1 "s_register_operand") 4541 (match_operand:<V_elem> 2 "s_register_operand")] 4542 "TARGET_NEON" 4543{ 4544 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4545 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4546 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4547 const0_rtx)); 4548 DONE; 4549}) 4550 4551(define_expand "neon_vmla_n<mode>" 4552 [(match_operand:VMD 0 "s_register_operand") 4553 (match_operand:VMD 1 "s_register_operand") 4554 (match_operand:VMD 2 "s_register_operand") 4555 (match_operand:<V_elem> 3 "s_register_operand")] 4556 "TARGET_NEON" 4557{ 4558 rtx tmp = gen_reg_rtx (<MODE>mode); 4559 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4560 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4561 tmp, const0_rtx)); 4562 DONE; 4563}) 4564 4565(define_expand "neon_vmla_n<mode>" 4566 [(match_operand:VMQ 0 "s_register_operand") 4567 (match_operand:VMQ 1 "s_register_operand") 4568 (match_operand:VMQ 2 "s_register_operand") 4569 (match_operand:<V_elem> 3 "s_register_operand")] 4570 "TARGET_NEON" 4571{ 4572 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4573 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4574 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4575 tmp, const0_rtx)); 4576 DONE; 4577}) 4578 4579(define_expand "neon_vmlals_n<mode>" 4580 [(match_operand:<V_widen> 0 "s_register_operand") 4581 (match_operand:<V_widen> 1 "s_register_operand") 4582 (match_operand:VMDI 2 "s_register_operand") 4583 (match_operand:<V_elem> 3 "s_register_operand")] 4584 "TARGET_NEON" 4585{ 4586 rtx tmp = gen_reg_rtx (<MODE>mode); 4587 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4588 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], 4589 tmp, const0_rtx)); 4590 DONE; 4591}) 4592 4593(define_expand "neon_vmlalu_n<mode>" 4594 [(match_operand:<V_widen> 0 "s_register_operand") 4595 (match_operand:<V_widen> 1 "s_register_operand") 4596 (match_operand:VMDI 2 "s_register_operand") 4597 (match_operand:<V_elem> 3 "s_register_operand")] 4598 "TARGET_NEON" 4599{ 4600 rtx tmp = gen_reg_rtx (<MODE>mode); 4601 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4602 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], 4603 tmp, const0_rtx)); 4604 DONE; 4605}) 4606 4607(define_expand "neon_vqdmlal_n<mode>" 4608 [(match_operand:<V_widen> 0 "s_register_operand") 4609 (match_operand:<V_widen> 1 "s_register_operand") 4610 (match_operand:VMDI 2 "s_register_operand") 4611 (match_operand:<V_elem> 3 "s_register_operand")] 4612 "TARGET_NEON" 4613{ 4614 rtx tmp = gen_reg_rtx (<MODE>mode); 4615 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4616 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], 4617 tmp, const0_rtx)); 4618 DONE; 4619}) 4620 4621(define_expand "neon_vmls_n<mode>" 4622 [(match_operand:VMD 0 "s_register_operand") 4623 (match_operand:VMD 1 "s_register_operand") 4624 (match_operand:VMD 2 "s_register_operand") 4625 (match_operand:<V_elem> 3 "s_register_operand")] 4626 "TARGET_NEON" 4627{ 4628 rtx tmp = gen_reg_rtx (<MODE>mode); 4629 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4630 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4631 tmp, const0_rtx)); 4632 DONE; 4633}) 4634 4635(define_expand "neon_vmls_n<mode>" 4636 [(match_operand:VMQ 0 "s_register_operand") 4637 (match_operand:VMQ 1 "s_register_operand") 4638 (match_operand:VMQ 2 "s_register_operand") 4639 (match_operand:<V_elem> 3 "s_register_operand")] 4640 "TARGET_NEON" 4641{ 4642 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4643 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4644 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4645 tmp, const0_rtx)); 4646 DONE; 4647}) 4648 4649(define_expand "neon_vmlsls_n<mode>" 4650 [(match_operand:<V_widen> 0 "s_register_operand") 4651 (match_operand:<V_widen> 1 "s_register_operand") 4652 (match_operand:VMDI 2 "s_register_operand") 4653 (match_operand:<V_elem> 3 "s_register_operand")] 4654 "TARGET_NEON" 4655{ 4656 rtx tmp = gen_reg_rtx (<MODE>mode); 4657 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4658 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], 4659 tmp, const0_rtx)); 4660 DONE; 4661}) 4662 4663(define_expand "neon_vmlslu_n<mode>" 4664 [(match_operand:<V_widen> 0 "s_register_operand") 4665 (match_operand:<V_widen> 1 "s_register_operand") 4666 (match_operand:VMDI 2 "s_register_operand") 4667 (match_operand:<V_elem> 3 "s_register_operand")] 4668 "TARGET_NEON" 4669{ 4670 rtx tmp = gen_reg_rtx (<MODE>mode); 4671 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4672 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], 4673 tmp, const0_rtx)); 4674 DONE; 4675}) 4676 4677(define_expand "neon_vqdmlsl_n<mode>" 4678 [(match_operand:<V_widen> 0 "s_register_operand") 4679 (match_operand:<V_widen> 1 "s_register_operand") 4680 (match_operand:VMDI 2 "s_register_operand") 4681 (match_operand:<V_elem> 3 "s_register_operand")] 4682 "TARGET_NEON" 4683{ 4684 rtx tmp = gen_reg_rtx (<MODE>mode); 4685 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4686 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], 4687 tmp, const0_rtx)); 4688 DONE; 4689}) 4690 4691(define_insn "@neon_vext<mode>" 4692 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 4693 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") 4694 (match_operand:VDQX 2 "s_register_operand" "w") 4695 (match_operand:SI 3 "immediate_operand" "i")] 4696 UNSPEC_VEXT))] 4697 "TARGET_NEON" 4698{ 4699 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); 4700 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; 4701} 4702 [(set_attr "type" "neon_ext<q>")] 4703) 4704 4705(define_insn "@neon_vrev64<mode>" 4706 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 4707 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] 4708 UNSPEC_VREV64))] 4709 "TARGET_NEON" 4710 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4711 [(set_attr "type" "neon_rev<q>")] 4712) 4713 4714(define_insn "@neon_vrev32<mode>" 4715 [(set (match_operand:VX 0 "s_register_operand" "=w") 4716 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] 4717 UNSPEC_VREV32))] 4718 "TARGET_NEON" 4719 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4720 [(set_attr "type" "neon_rev<q>")] 4721) 4722 4723(define_insn "@neon_vrev16<mode>" 4724 [(set (match_operand:VE 0 "s_register_operand" "=w") 4725 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] 4726 UNSPEC_VREV16))] 4727 "TARGET_NEON" 4728 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4729 [(set_attr "type" "neon_rev<q>")] 4730) 4731 4732; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register 4733; allocation. For an intrinsic of form: 4734; rD = vbsl_* (rS, rN, rM) 4735; We can use any of: 4736; vbsl rS, rN, rM (if D = S) 4737; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) 4738; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) 4739 4740(define_insn "neon_vbsl<mode>_internal" 4741 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") 4742 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") 4743 (match_operand:VDQX 2 "s_register_operand" " w,w,0") 4744 (match_operand:VDQX 3 "s_register_operand" " w,0,w")] 4745 UNSPEC_VBSL))] 4746 "TARGET_NEON" 4747 "@ 4748 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 4749 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 4750 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" 4751 [(set_attr "type" "neon_bsl<q>")] 4752) 4753 4754(define_expand "neon_vbsl<mode>" 4755 [(set (match_operand:VDQX 0 "s_register_operand") 4756 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand") 4757 (match_operand:VDQX 2 "s_register_operand") 4758 (match_operand:VDQX 3 "s_register_operand")] 4759 UNSPEC_VBSL))] 4760 "TARGET_NEON" 4761{ 4762 /* We can't alias operands together if they have different modes. */ 4763 operands[1] = gen_lowpart (<MODE>mode, operands[1]); 4764}) 4765 4766;; vshl, vrshl 4767(define_insn "neon_v<shift_op><sup><mode>" 4768 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4769 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4770 (match_operand:VDQIX 2 "s_register_operand" "w")] 4771 VSHL))] 4772 "TARGET_NEON" 4773 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4774 [(set_attr "type" "neon_shift_imm<q>")] 4775) 4776 4777;; vqshl, vqrshl 4778(define_insn "neon_v<shift_op><sup><mode>" 4779 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4780 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4781 (match_operand:VDQIX 2 "s_register_operand" "w")] 4782 VQSHL))] 4783 "TARGET_NEON" 4784 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4785 [(set_attr "type" "neon_sat_shift_imm<q>")] 4786) 4787 4788;; vshr_n, vrshr_n 4789(define_insn "neon_v<shift_op><sup>_n<mode>" 4790 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4791 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4792 (match_operand:SI 2 "immediate_operand" "i")] 4793 VSHR_N))] 4794 "TARGET_NEON" 4795{ 4796 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); 4797 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4798} 4799 [(set_attr "type" "neon_shift_imm<q>")] 4800) 4801 4802;; vshrn_n, vrshrn_n 4803(define_insn "neon_v<shift_op>_n<mode>" 4804 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4805 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4806 (match_operand:SI 2 "immediate_operand" "i")] 4807 VSHRN_N))] 4808 "TARGET_NEON" 4809{ 4810 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4811 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; 4812} 4813 [(set_attr "type" "neon_shift_imm_narrow_q")] 4814) 4815 4816;; vqshrn_n, vqrshrn_n 4817(define_insn "neon_v<shift_op><sup>_n<mode>" 4818 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4819 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4820 (match_operand:SI 2 "immediate_operand" "i")] 4821 VQSHRN_N))] 4822 "TARGET_NEON" 4823{ 4824 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4825 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; 4826} 4827 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4828) 4829 4830;; vqshrun_n, vqrshrun_n 4831(define_insn "neon_v<shift_op>_n<mode>" 4832 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4833 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4834 (match_operand:SI 2 "immediate_operand" "i")] 4835 VQSHRUN_N))] 4836 "TARGET_NEON" 4837{ 4838 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4839 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; 4840} 4841 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4842) 4843 4844(define_insn "neon_vshl_n<mode>" 4845 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4846 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4847 (match_operand:SI 2 "immediate_operand" "i")] 4848 UNSPEC_VSHL_N))] 4849 "TARGET_NEON" 4850{ 4851 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4852 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4853} 4854 [(set_attr "type" "neon_shift_imm<q>")] 4855) 4856 4857(define_insn "neon_vqshl_<sup>_n<mode>" 4858 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4859 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4860 (match_operand:SI 2 "immediate_operand" "i")] 4861 VQSHL_N))] 4862 "TARGET_NEON" 4863{ 4864 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4865 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4866} 4867 [(set_attr "type" "neon_sat_shift_imm<q>")] 4868) 4869 4870(define_insn "neon_vqshlu_n<mode>" 4871 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4872 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4873 (match_operand:SI 2 "immediate_operand" "i")] 4874 UNSPEC_VQSHLU_N))] 4875 "TARGET_NEON" 4876{ 4877 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4878 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4879} 4880 [(set_attr "type" "neon_sat_shift_imm<q>")] 4881) 4882 4883(define_insn "neon_vshll<sup>_n<mode>" 4884 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4885 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 4886 (match_operand:SI 2 "immediate_operand" "i")] 4887 VSHLL_N))] 4888 "TARGET_NEON" 4889{ 4890 /* The boundaries are: 0 < imm <= size. */ 4891 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); 4892 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; 4893} 4894 [(set_attr "type" "neon_shift_imm_long")] 4895) 4896 4897;; vsra_n, vrsra_n 4898(define_insn "neon_v<shift_op><sup>_n<mode>" 4899 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4900 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4901 (match_operand:VDQIX 2 "s_register_operand" "w") 4902 (match_operand:SI 3 "immediate_operand" "i")] 4903 VSRA_N))] 4904 "TARGET_NEON" 4905{ 4906 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 4907 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4908} 4909 [(set_attr "type" "neon_shift_acc<q>")] 4910) 4911 4912(define_insn "neon_vsri_n<mode>" 4913 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4914 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4915 (match_operand:VDQIX 2 "s_register_operand" "w") 4916 (match_operand:SI 3 "immediate_operand" "i")] 4917 UNSPEC_VSRI))] 4918 "TARGET_NEON" 4919{ 4920 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 4921 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4922} 4923 [(set_attr "type" "neon_shift_reg<q>")] 4924) 4925 4926(define_insn "neon_vsli_n<mode>" 4927 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4928 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4929 (match_operand:VDQIX 2 "s_register_operand" "w") 4930 (match_operand:SI 3 "immediate_operand" "i")] 4931 UNSPEC_VSLI))] 4932 "TARGET_NEON" 4933{ 4934 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); 4935 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4936} 4937 [(set_attr "type" "neon_shift_reg<q>")] 4938) 4939 4940(define_insn "neon_vtbl1v8qi" 4941 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4942 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") 4943 (match_operand:V8QI 2 "s_register_operand" "w")] 4944 UNSPEC_VTBL))] 4945 "TARGET_NEON" 4946 "vtbl.8\t%P0, {%P1}, %P2" 4947 [(set_attr "type" "neon_tbl1")] 4948) 4949 4950(define_insn "neon_vtbl2v8qi" 4951 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4952 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") 4953 (match_operand:V8QI 2 "s_register_operand" "w")] 4954 UNSPEC_VTBL))] 4955 "TARGET_NEON" 4956{ 4957 rtx ops[4]; 4958 int tabbase = REGNO (operands[1]); 4959 4960 ops[0] = operands[0]; 4961 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4962 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4963 ops[3] = operands[2]; 4964 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); 4965 4966 return ""; 4967} 4968 [(set_attr "type" "neon_tbl2")] 4969) 4970 4971(define_insn "neon_vtbl3v8qi" 4972 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4973 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") 4974 (match_operand:V8QI 2 "s_register_operand" "w")] 4975 UNSPEC_VTBL))] 4976 "TARGET_NEON" 4977{ 4978 rtx ops[5]; 4979 int tabbase = REGNO (operands[1]); 4980 4981 ops[0] = operands[0]; 4982 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4983 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4984 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4985 ops[4] = operands[2]; 4986 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 4987 4988 return ""; 4989} 4990 [(set_attr "type" "neon_tbl3")] 4991) 4992 4993(define_insn "neon_vtbl4v8qi" 4994 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4995 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") 4996 (match_operand:V8QI 2 "s_register_operand" "w")] 4997 UNSPEC_VTBL))] 4998 "TARGET_NEON" 4999{ 5000 rtx ops[6]; 5001 int tabbase = REGNO (operands[1]); 5002 5003 ops[0] = operands[0]; 5004 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5005 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5006 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5007 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 5008 ops[5] = operands[2]; 5009 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 5010 5011 return ""; 5012} 5013 [(set_attr "type" "neon_tbl4")] 5014) 5015 5016;; These three are used by the vec_perm infrastructure for V16QImode. 5017(define_insn_and_split "neon_vtbl1v16qi" 5018 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 5019 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") 5020 (match_operand:V16QI 2 "s_register_operand" "w")] 5021 UNSPEC_VTBL))] 5022 "TARGET_NEON" 5023 "#" 5024 "&& reload_completed" 5025 [(const_int 0)] 5026{ 5027 rtx op0, op1, op2, part0, part2; 5028 unsigned ofs; 5029 5030 op0 = operands[0]; 5031 op1 = gen_lowpart (TImode, operands[1]); 5032 op2 = operands[2]; 5033 5034 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 5035 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5036 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5037 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5038 5039 ofs = subreg_highpart_offset (V8QImode, V16QImode); 5040 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5041 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5042 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5043 DONE; 5044} 5045 [(set_attr "type" "multiple")] 5046) 5047 5048(define_insn_and_split "neon_vtbl2v16qi" 5049 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 5050 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") 5051 (match_operand:V16QI 2 "s_register_operand" "w")] 5052 UNSPEC_VTBL))] 5053 "TARGET_NEON" 5054 "#" 5055 "&& reload_completed" 5056 [(const_int 0)] 5057{ 5058 rtx op0, op1, op2, part0, part2; 5059 unsigned ofs; 5060 5061 op0 = operands[0]; 5062 op1 = operands[1]; 5063 op2 = operands[2]; 5064 5065 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 5066 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5067 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5068 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5069 5070 ofs = subreg_highpart_offset (V8QImode, V16QImode); 5071 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5072 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5073 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5074 DONE; 5075} 5076 [(set_attr "type" "multiple")] 5077) 5078 5079;; ??? Logically we should extend the regular neon_vcombine pattern to 5080;; handle quad-word input modes, producing octa-word output modes. But 5081;; that requires us to add support for octa-word vector modes in moves. 5082;; That seems overkill for this one use in vec_perm. 5083(define_insn_and_split "neon_vcombinev16qi" 5084 [(set (match_operand:OI 0 "s_register_operand" "=w") 5085 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") 5086 (match_operand:V16QI 2 "s_register_operand" "w")] 5087 UNSPEC_VCONCAT))] 5088 "TARGET_NEON" 5089 "#" 5090 "&& reload_completed" 5091 [(const_int 0)] 5092{ 5093 neon_split_vcombine (operands); 5094 DONE; 5095} 5096[(set_attr "type" "multiple")] 5097) 5098 5099(define_insn "neon_vtbx1v8qi" 5100 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5101 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5102 (match_operand:V8QI 2 "s_register_operand" "w") 5103 (match_operand:V8QI 3 "s_register_operand" "w")] 5104 UNSPEC_VTBX))] 5105 "TARGET_NEON" 5106 "vtbx.8\t%P0, {%P2}, %P3" 5107 [(set_attr "type" "neon_tbl1")] 5108) 5109 5110(define_insn "neon_vtbx2v8qi" 5111 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5112 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5113 (match_operand:TI 2 "s_register_operand" "w") 5114 (match_operand:V8QI 3 "s_register_operand" "w")] 5115 UNSPEC_VTBX))] 5116 "TARGET_NEON" 5117{ 5118 rtx ops[4]; 5119 int tabbase = REGNO (operands[2]); 5120 5121 ops[0] = operands[0]; 5122 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5123 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5124 ops[3] = operands[3]; 5125 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); 5126 5127 return ""; 5128} 5129 [(set_attr "type" "neon_tbl2")] 5130) 5131 5132(define_insn "neon_vtbx3v8qi" 5133 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5134 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5135 (match_operand:EI 2 "s_register_operand" "w") 5136 (match_operand:V8QI 3 "s_register_operand" "w")] 5137 UNSPEC_VTBX))] 5138 "TARGET_NEON" 5139{ 5140 rtx ops[5]; 5141 int tabbase = REGNO (operands[2]); 5142 5143 ops[0] = operands[0]; 5144 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5145 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5146 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5147 ops[4] = operands[3]; 5148 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 5149 5150 return ""; 5151} 5152 [(set_attr "type" "neon_tbl3")] 5153) 5154 5155(define_insn "neon_vtbx4v8qi" 5156 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5157 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5158 (match_operand:OI 2 "s_register_operand" "w") 5159 (match_operand:V8QI 3 "s_register_operand" "w")] 5160 UNSPEC_VTBX))] 5161 "TARGET_NEON" 5162{ 5163 rtx ops[6]; 5164 int tabbase = REGNO (operands[2]); 5165 5166 ops[0] = operands[0]; 5167 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5168 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5169 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5170 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 5171 ops[5] = operands[3]; 5172 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 5173 5174 return ""; 5175} 5176 [(set_attr "type" "neon_tbl4")] 5177) 5178 5179(define_expand "@neon_vtrn<mode>_internal" 5180 [(parallel 5181 [(set (match_operand:VDQWH 0 "s_register_operand") 5182 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5183 (match_operand:VDQWH 2 "s_register_operand")] 5184 UNSPEC_VTRN1)) 5185 (set (match_operand:VDQWH 3 "s_register_operand") 5186 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] 5187 "TARGET_NEON" 5188 "" 5189) 5190 5191;; Note: Different operand numbering to handle tied registers correctly. 5192(define_insn "*neon_vtrn<mode>_insn" 5193 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5194 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5195 (match_operand:VDQWH 3 "s_register_operand" "2")] 5196 UNSPEC_VTRN1)) 5197 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5198 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5199 UNSPEC_VTRN2))] 5200 "TARGET_NEON" 5201 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5202 [(set_attr "type" "neon_permute<q>")] 5203) 5204 5205(define_expand "@neon_vzip<mode>_internal" 5206 [(parallel 5207 [(set (match_operand:VDQWH 0 "s_register_operand") 5208 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5209 (match_operand:VDQWH 2 "s_register_operand")] 5210 UNSPEC_VZIP1)) 5211 (set (match_operand:VDQWH 3 "s_register_operand") 5212 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] 5213 "TARGET_NEON" 5214 "" 5215) 5216 5217;; Note: Different operand numbering to handle tied registers correctly. 5218(define_insn "*neon_vzip<mode>_insn" 5219 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5220 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5221 (match_operand:VDQWH 3 "s_register_operand" "2")] 5222 UNSPEC_VZIP1)) 5223 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5224 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5225 UNSPEC_VZIP2))] 5226 "TARGET_NEON" 5227 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5228 [(set_attr "type" "neon_zip<q>")] 5229) 5230 5231(define_expand "@neon_vuzp<mode>_internal" 5232 [(parallel 5233 [(set (match_operand:VDQWH 0 "s_register_operand") 5234 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5235 (match_operand:VDQWH 2 "s_register_operand")] 5236 UNSPEC_VUZP1)) 5237 (set (match_operand:VDQWH 3 "s_register_operand") 5238 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] 5239 "TARGET_NEON" 5240 "" 5241) 5242 5243;; Note: Different operand numbering to handle tied registers correctly. 5244(define_insn "*neon_vuzp<mode>_insn" 5245 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5246 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5247 (match_operand:VDQWH 3 "s_register_operand" "2")] 5248 UNSPEC_VUZP1)) 5249 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5250 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5251 UNSPEC_VUZP2))] 5252 "TARGET_NEON" 5253 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5254 [(set_attr "type" "neon_zip<q>")] 5255) 5256 5257(define_expand "vec_load_lanes<mode><mode>" 5258 [(set (match_operand:VDQX 0 "s_register_operand") 5259 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] 5260 UNSPEC_VLD1))] 5261 "TARGET_NEON") 5262 5263(define_insn "neon_vld1<mode>" 5264 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 5265 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] 5266 UNSPEC_VLD1))] 5267 "TARGET_NEON" 5268 "vld1.<V_sz_elem>\t%h0, %A1" 5269 [(set_attr "type" "neon_load1_1reg<q>")] 5270) 5271 5272;; The lane numbers in the RTL are in GCC lane order, having been flipped 5273;; in arm_expand_neon_args. The lane numbers are restored to architectural 5274;; lane order here. 5275(define_insn "neon_vld1_lane<mode>" 5276 [(set (match_operand:VDX 0 "s_register_operand" "=w") 5277 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 5278 (match_operand:VDX 2 "s_register_operand" "0") 5279 (match_operand:SI 3 "immediate_operand" "i")] 5280 UNSPEC_VLD1_LANE))] 5281 "TARGET_NEON" 5282{ 5283 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5284 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5285 operands[3] = GEN_INT (lane); 5286 if (max == 1) 5287 return "vld1.<V_sz_elem>\t%P0, %A1"; 5288 else 5289 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 5290} 5291 [(set_attr "type" "neon_load1_one_lane<q>")] 5292) 5293 5294;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5295;; here on big endian targets. 5296(define_insn "neon_vld1_lane<mode>" 5297 [(set (match_operand:VQX 0 "s_register_operand" "=w") 5298 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 5299 (match_operand:VQX 2 "s_register_operand" "0") 5300 (match_operand:SI 3 "immediate_operand" "i")] 5301 UNSPEC_VLD1_LANE))] 5302 "TARGET_NEON" 5303{ 5304 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5305 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5306 operands[3] = GEN_INT (lane); 5307 int regno = REGNO (operands[0]); 5308 if (lane >= max / 2) 5309 { 5310 lane -= max / 2; 5311 regno += 2; 5312 operands[3] = GEN_INT (lane); 5313 } 5314 operands[0] = gen_rtx_REG (<V_HALF>mode, regno); 5315 if (max == 2) 5316 return "vld1.<V_sz_elem>\t%P0, %A1"; 5317 else 5318 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 5319} 5320 [(set_attr "type" "neon_load1_one_lane<q>")] 5321) 5322 5323(define_insn "neon_vld1_dup<mode>" 5324 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") 5325 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 5326 "TARGET_NEON" 5327 "vld1.<V_sz_elem>\t{%P0[]}, %A1" 5328 [(set_attr "type" "neon_load1_all_lanes<q>")] 5329) 5330 5331;; Special case for DImode. Treat it exactly like a simple load. 5332(define_expand "neon_vld1_dupdi" 5333 [(set (match_operand:DI 0 "s_register_operand") 5334 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")] 5335 UNSPEC_VLD1))] 5336 "TARGET_NEON" 5337 "" 5338) 5339 5340(define_insn "neon_vld1_dup<mode>" 5341 [(set (match_operand:VQ2 0 "s_register_operand" "=w") 5342 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 5343 "TARGET_NEON" 5344{ 5345 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5346} 5347 [(set_attr "type" "neon_load1_all_lanes<q>")] 5348) 5349 5350(define_insn_and_split "neon_vld1_dupv2di" 5351 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 5352 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] 5353 "TARGET_NEON" 5354 "#" 5355 "&& reload_completed" 5356 [(const_int 0)] 5357 { 5358 rtx tmprtx = gen_lowpart (DImode, operands[0]); 5359 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); 5360 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); 5361 DONE; 5362 } 5363 [(set_attr "length" "8") 5364 (set_attr "type" "neon_load1_all_lanes_q")] 5365) 5366 5367(define_expand "vec_store_lanes<mode><mode>" 5368 [(set (match_operand:VDQX 0 "neon_struct_operand") 5369 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] 5370 UNSPEC_VST1))] 5371 "TARGET_NEON") 5372 5373(define_insn "neon_vst1<mode>" 5374 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") 5375 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] 5376 UNSPEC_VST1))] 5377 "TARGET_NEON" 5378 "vst1.<V_sz_elem>\t%h1, %A0" 5379 [(set_attr "type" "neon_store1_1reg<q>")]) 5380 5381;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5382;; here on big endian targets. 5383(define_insn "neon_vst1_lane<mode>" 5384 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5385 (unspec:<V_elem> 5386 [(match_operand:VDX 1 "s_register_operand" "w") 5387 (match_operand:SI 2 "immediate_operand" "i")] 5388 UNSPEC_VST1_LANE))] 5389 "TARGET_NEON" 5390{ 5391 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5392 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5393 operands[2] = GEN_INT (lane); 5394 if (max == 1) 5395 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5396 else 5397 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5398} 5399 [(set_attr "type" "neon_store1_one_lane<q>")] 5400) 5401 5402;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5403;; here on big endian targets. 5404(define_insn "neon_vst1_lane<mode>" 5405 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5406 (unspec:<V_elem> 5407 [(match_operand:VQX 1 "s_register_operand" "w") 5408 (match_operand:SI 2 "immediate_operand" "i")] 5409 UNSPEC_VST1_LANE))] 5410 "TARGET_NEON" 5411{ 5412 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5413 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5414 int regno = REGNO (operands[1]); 5415 if (lane >= max / 2) 5416 { 5417 lane -= max / 2; 5418 regno += 2; 5419 } 5420 operands[2] = GEN_INT (lane); 5421 operands[1] = gen_rtx_REG (<V_HALF>mode, regno); 5422 if (max == 2) 5423 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5424 else 5425 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5426} 5427 [(set_attr "type" "neon_store1_one_lane<q>")] 5428) 5429 5430(define_expand "vec_load_lanesti<mode>" 5431 [(set (match_operand:TI 0 "s_register_operand") 5432 (unspec:TI [(match_operand:TI 1 "neon_struct_operand") 5433 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5434 UNSPEC_VLD2))] 5435 "TARGET_NEON") 5436 5437(define_insn "neon_vld2<mode>" 5438 [(set (match_operand:TI 0 "s_register_operand" "=w") 5439 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") 5440 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5441 UNSPEC_VLD2))] 5442 "TARGET_NEON" 5443{ 5444 if (<V_sz_elem> == 64) 5445 return "vld1.64\t%h0, %A1"; 5446 else 5447 return "vld2.<V_sz_elem>\t%h0, %A1"; 5448} 5449 [(set (attr "type") 5450 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5451 (const_string "neon_load1_2reg<q>") 5452 (const_string "neon_load2_2reg<q>")))] 5453) 5454 5455(define_expand "vec_load_lanesoi<mode>" 5456 [(set (match_operand:OI 0 "s_register_operand") 5457 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 5458 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5459 UNSPEC_VLD2))] 5460 "TARGET_NEON") 5461 5462(define_insn "neon_vld2<mode>" 5463 [(set (match_operand:OI 0 "s_register_operand" "=w") 5464 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 5465 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5466 UNSPEC_VLD2))] 5467 "TARGET_NEON" 5468 "vld2.<V_sz_elem>\t%h0, %A1" 5469 [(set_attr "type" "neon_load2_2reg_q")]) 5470 5471;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5472;; here on big endian targets. 5473(define_insn "neon_vld2_lane<mode>" 5474 [(set (match_operand:TI 0 "s_register_operand" "=w") 5475 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5476 (match_operand:TI 2 "s_register_operand" "0") 5477 (match_operand:SI 3 "immediate_operand" "i") 5478 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5479 UNSPEC_VLD2_LANE))] 5480 "TARGET_NEON" 5481{ 5482 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5483 int regno = REGNO (operands[0]); 5484 rtx ops[4]; 5485 ops[0] = gen_rtx_REG (DImode, regno); 5486 ops[1] = gen_rtx_REG (DImode, regno + 2); 5487 ops[2] = operands[1]; 5488 ops[3] = GEN_INT (lane); 5489 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5490 return ""; 5491} 5492 [(set_attr "type" "neon_load2_one_lane<q>")] 5493) 5494 5495;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5496;; here on big endian targets. 5497(define_insn "neon_vld2_lane<mode>" 5498 [(set (match_operand:OI 0 "s_register_operand" "=w") 5499 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5500 (match_operand:OI 2 "s_register_operand" "0") 5501 (match_operand:SI 3 "immediate_operand" "i") 5502 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5503 UNSPEC_VLD2_LANE))] 5504 "TARGET_NEON" 5505{ 5506 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5507 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5508 int regno = REGNO (operands[0]); 5509 rtx ops[4]; 5510 if (lane >= max / 2) 5511 { 5512 lane -= max / 2; 5513 regno += 2; 5514 } 5515 ops[0] = gen_rtx_REG (DImode, regno); 5516 ops[1] = gen_rtx_REG (DImode, regno + 4); 5517 ops[2] = operands[1]; 5518 ops[3] = GEN_INT (lane); 5519 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5520 return ""; 5521} 5522 [(set_attr "type" "neon_load2_one_lane<q>")] 5523) 5524 5525(define_insn "neon_vld2_dup<mode>" 5526 [(set (match_operand:TI 0 "s_register_operand" "=w") 5527 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5528 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5529 UNSPEC_VLD2_DUP))] 5530 "TARGET_NEON" 5531{ 5532 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5533 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5534 else 5535 return "vld1.<V_sz_elem>\t%h0, %A1"; 5536} 5537 [(set (attr "type") 5538 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5539 (const_string "neon_load2_all_lanes<q>") 5540 (const_string "neon_load1_1reg<q>")))] 5541) 5542 5543(define_insn "neon_vld2_dupv8bf" 5544 [(set (match_operand:OI 0 "s_register_operand" "=w") 5545 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 5546 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5547 UNSPEC_VLD2_DUP))] 5548 "TARGET_BF16_SIMD" 5549 { 5550 rtx ops[5]; 5551 int tabbase = REGNO (operands[0]); 5552 5553 ops[4] = operands[1]; 5554 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 5555 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 5556 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 5557 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); 5558 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops); 5559 return ""; 5560 } 5561 [(set_attr "type" "neon_load2_all_lanes_q")] 5562) 5563 5564(define_expand "vec_store_lanesti<mode>" 5565 [(set (match_operand:TI 0 "neon_struct_operand") 5566 (unspec:TI [(match_operand:TI 1 "s_register_operand") 5567 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5568 UNSPEC_VST2))] 5569 "TARGET_NEON") 5570 5571(define_insn "neon_vst2<mode>" 5572 [(set (match_operand:TI 0 "neon_struct_operand" "=Um") 5573 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") 5574 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5575 UNSPEC_VST2))] 5576 "TARGET_NEON" 5577{ 5578 if (<V_sz_elem> == 64) 5579 return "vst1.64\t%h1, %A0"; 5580 else 5581 return "vst2.<V_sz_elem>\t%h1, %A0"; 5582} 5583 [(set (attr "type") 5584 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5585 (const_string "neon_store1_2reg<q>") 5586 (const_string "neon_store2_one_lane<q>")))] 5587) 5588 5589(define_expand "vec_store_lanesoi<mode>" 5590 [(set (match_operand:OI 0 "neon_struct_operand") 5591 (unspec:OI [(match_operand:OI 1 "s_register_operand") 5592 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5593 UNSPEC_VST2))] 5594 "TARGET_NEON") 5595 5596(define_insn "neon_vst2<mode>" 5597 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5598 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 5599 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5600 UNSPEC_VST2))] 5601 "TARGET_NEON" 5602 "vst2.<V_sz_elem>\t%h1, %A0" 5603 [(set_attr "type" "neon_store2_4reg<q>")] 5604) 5605 5606;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5607;; here on big endian targets. 5608(define_insn "neon_vst2_lane<mode>" 5609 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5610 (unspec:<V_two_elem> 5611 [(match_operand:TI 1 "s_register_operand" "w") 5612 (match_operand:SI 2 "immediate_operand" "i") 5613 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5614 UNSPEC_VST2_LANE))] 5615 "TARGET_NEON" 5616{ 5617 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5618 int regno = REGNO (operands[1]); 5619 rtx ops[4]; 5620 ops[0] = operands[0]; 5621 ops[1] = gen_rtx_REG (DImode, regno); 5622 ops[2] = gen_rtx_REG (DImode, regno + 2); 5623 ops[3] = GEN_INT (lane); 5624 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5625 return ""; 5626} 5627 [(set_attr "type" "neon_store2_one_lane<q>")] 5628) 5629 5630;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5631;; here on big endian targets. 5632(define_insn "neon_vst2_lane<mode>" 5633 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5634 (unspec:<V_two_elem> 5635 [(match_operand:OI 1 "s_register_operand" "w") 5636 (match_operand:SI 2 "immediate_operand" "i") 5637 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5638 UNSPEC_VST2_LANE))] 5639 "TARGET_NEON" 5640{ 5641 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5642 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5643 int regno = REGNO (operands[1]); 5644 rtx ops[4]; 5645 if (lane >= max / 2) 5646 { 5647 lane -= max / 2; 5648 regno += 2; 5649 } 5650 ops[0] = operands[0]; 5651 ops[1] = gen_rtx_REG (DImode, regno); 5652 ops[2] = gen_rtx_REG (DImode, regno + 4); 5653 ops[3] = GEN_INT (lane); 5654 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5655 return ""; 5656} 5657 [(set_attr "type" "neon_store2_one_lane<q>")] 5658) 5659 5660(define_expand "vec_load_lanesei<mode>" 5661 [(set (match_operand:EI 0 "s_register_operand") 5662 (unspec:EI [(match_operand:EI 1 "neon_struct_operand") 5663 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5664 UNSPEC_VLD3))] 5665 "TARGET_NEON") 5666 5667(define_insn "neon_vld3<mode>" 5668 [(set (match_operand:EI 0 "s_register_operand" "=w") 5669 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") 5670 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5671 UNSPEC_VLD3))] 5672 "TARGET_NEON" 5673{ 5674 if (<V_sz_elem> == 64) 5675 return "vld1.64\t%h0, %A1"; 5676 else 5677 return "vld3.<V_sz_elem>\t%h0, %A1"; 5678} 5679 [(set (attr "type") 5680 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5681 (const_string "neon_load1_3reg<q>") 5682 (const_string "neon_load3_3reg<q>")))] 5683) 5684 5685(define_expand "vec_load_lanesci<mode>" 5686 [(match_operand:CI 0 "s_register_operand") 5687 (match_operand:CI 1 "neon_struct_operand") 5688 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5689 "TARGET_NEON" 5690{ 5691 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); 5692 DONE; 5693}) 5694 5695(define_expand "neon_vld3<mode>" 5696 [(match_operand:CI 0 "s_register_operand") 5697 (match_operand:CI 1 "neon_struct_operand") 5698 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5699 "TARGET_NEON" 5700{ 5701 rtx mem; 5702 5703 mem = adjust_address (operands[1], EImode, 0); 5704 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); 5705 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5706 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); 5707 DONE; 5708}) 5709 5710(define_insn "neon_vld3qa<mode>" 5711 [(set (match_operand:CI 0 "s_register_operand" "=w") 5712 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5713 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5714 UNSPEC_VLD3A))] 5715 "TARGET_NEON" 5716{ 5717 int regno = REGNO (operands[0]); 5718 rtx ops[4]; 5719 ops[0] = gen_rtx_REG (DImode, regno); 5720 ops[1] = gen_rtx_REG (DImode, regno + 4); 5721 ops[2] = gen_rtx_REG (DImode, regno + 8); 5722 ops[3] = operands[1]; 5723 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5724 return ""; 5725} 5726 [(set_attr "type" "neon_load3_3reg<q>")] 5727) 5728 5729(define_insn "neon_vld3qb<mode>" 5730 [(set (match_operand:CI 0 "s_register_operand" "=w") 5731 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5732 (match_operand:CI 2 "s_register_operand" "0") 5733 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5734 UNSPEC_VLD3B))] 5735 "TARGET_NEON" 5736{ 5737 int regno = REGNO (operands[0]); 5738 rtx ops[4]; 5739 ops[0] = gen_rtx_REG (DImode, regno + 2); 5740 ops[1] = gen_rtx_REG (DImode, regno + 6); 5741 ops[2] = gen_rtx_REG (DImode, regno + 10); 5742 ops[3] = operands[1]; 5743 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5744 return ""; 5745} 5746 [(set_attr "type" "neon_load3_3reg<q>")] 5747) 5748 5749;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5750;; here on big endian targets. 5751(define_insn "neon_vld3_lane<mode>" 5752 [(set (match_operand:EI 0 "s_register_operand" "=w") 5753 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5754 (match_operand:EI 2 "s_register_operand" "0") 5755 (match_operand:SI 3 "immediate_operand" "i") 5756 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5757 UNSPEC_VLD3_LANE))] 5758 "TARGET_NEON" 5759{ 5760 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); 5761 int regno = REGNO (operands[0]); 5762 rtx ops[5]; 5763 ops[0] = gen_rtx_REG (DImode, regno); 5764 ops[1] = gen_rtx_REG (DImode, regno + 2); 5765 ops[2] = gen_rtx_REG (DImode, regno + 4); 5766 ops[3] = operands[1]; 5767 ops[4] = GEN_INT (lane); 5768 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5769 ops); 5770 return ""; 5771} 5772 [(set_attr "type" "neon_load3_one_lane<q>")] 5773) 5774 5775;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5776;; here on big endian targets. 5777(define_insn "neon_vld3_lane<mode>" 5778 [(set (match_operand:CI 0 "s_register_operand" "=w") 5779 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5780 (match_operand:CI 2 "s_register_operand" "0") 5781 (match_operand:SI 3 "immediate_operand" "i") 5782 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5783 UNSPEC_VLD3_LANE))] 5784 "TARGET_NEON" 5785{ 5786 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5787 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5788 int regno = REGNO (operands[0]); 5789 rtx ops[5]; 5790 if (lane >= max / 2) 5791 { 5792 lane -= max / 2; 5793 regno += 2; 5794 } 5795 ops[0] = gen_rtx_REG (DImode, regno); 5796 ops[1] = gen_rtx_REG (DImode, regno + 4); 5797 ops[2] = gen_rtx_REG (DImode, regno + 8); 5798 ops[3] = operands[1]; 5799 ops[4] = GEN_INT (lane); 5800 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5801 ops); 5802 return ""; 5803} 5804 [(set_attr "type" "neon_load3_one_lane<q>")] 5805) 5806 5807(define_insn "neon_vld3_dup<mode>" 5808 [(set (match_operand:EI 0 "s_register_operand" "=w") 5809 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5810 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5811 UNSPEC_VLD3_DUP))] 5812 "TARGET_NEON" 5813{ 5814 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5815 { 5816 int regno = REGNO (operands[0]); 5817 rtx ops[4]; 5818 ops[0] = gen_rtx_REG (DImode, regno); 5819 ops[1] = gen_rtx_REG (DImode, regno + 2); 5820 ops[2] = gen_rtx_REG (DImode, regno + 4); 5821 ops[3] = operands[1]; 5822 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); 5823 return ""; 5824 } 5825 else 5826 return "vld1.<V_sz_elem>\t%h0, %A1"; 5827} 5828 [(set (attr "type") 5829 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5830 (const_string "neon_load3_all_lanes<q>") 5831 (const_string "neon_load1_1reg<q>")))]) 5832 5833(define_insn "neon_vld3_dupv8bf" 5834 [(set (match_operand:CI 0 "s_register_operand" "=w") 5835 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 5836 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5837 UNSPEC_VLD2_DUP))] 5838 "TARGET_BF16_SIMD" 5839 { 5840 rtx ops[4]; 5841 int tabbase = REGNO (operands[0]); 5842 5843 ops[3] = operands[1]; 5844 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 5845 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 5846 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 5847 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops); 5848 return ""; 5849 } 5850 [(set_attr "type" "neon_load3_all_lanes_q")] 5851) 5852 5853(define_expand "vec_store_lanesei<mode>" 5854 [(set (match_operand:EI 0 "neon_struct_operand") 5855 (unspec:EI [(match_operand:EI 1 "s_register_operand") 5856 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5857 UNSPEC_VST3))] 5858 "TARGET_NEON") 5859 5860(define_insn "neon_vst3<mode>" 5861 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5862 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") 5863 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5864 UNSPEC_VST3))] 5865 "TARGET_NEON" 5866{ 5867 if (<V_sz_elem> == 64) 5868 return "vst1.64\t%h1, %A0"; 5869 else 5870 return "vst3.<V_sz_elem>\t%h1, %A0"; 5871} 5872 [(set (attr "type") 5873 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5874 (const_string "neon_store1_3reg<q>") 5875 (const_string "neon_store3_one_lane<q>")))]) 5876 5877(define_expand "vec_store_lanesci<mode>" 5878 [(match_operand:CI 0 "neon_struct_operand") 5879 (match_operand:CI 1 "s_register_operand") 5880 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5881 "TARGET_NEON" 5882{ 5883 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); 5884 DONE; 5885}) 5886 5887(define_expand "neon_vst3<mode>" 5888 [(match_operand:CI 0 "neon_struct_operand") 5889 (match_operand:CI 1 "s_register_operand") 5890 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5891 "TARGET_NEON" 5892{ 5893 rtx mem; 5894 5895 mem = adjust_address (operands[0], EImode, 0); 5896 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); 5897 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5898 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); 5899 DONE; 5900}) 5901 5902(define_insn "neon_vst3qa<mode>" 5903 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5904 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5905 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5906 UNSPEC_VST3A))] 5907 "TARGET_NEON" 5908{ 5909 int regno = REGNO (operands[1]); 5910 rtx ops[4]; 5911 ops[0] = operands[0]; 5912 ops[1] = gen_rtx_REG (DImode, regno); 5913 ops[2] = gen_rtx_REG (DImode, regno + 4); 5914 ops[3] = gen_rtx_REG (DImode, regno + 8); 5915 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5916 return ""; 5917} 5918 [(set_attr "type" "neon_store3_3reg<q>")] 5919) 5920 5921(define_insn "neon_vst3qb<mode>" 5922 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5923 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5924 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5925 UNSPEC_VST3B))] 5926 "TARGET_NEON" 5927{ 5928 int regno = REGNO (operands[1]); 5929 rtx ops[4]; 5930 ops[0] = operands[0]; 5931 ops[1] = gen_rtx_REG (DImode, regno + 2); 5932 ops[2] = gen_rtx_REG (DImode, regno + 6); 5933 ops[3] = gen_rtx_REG (DImode, regno + 10); 5934 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5935 return ""; 5936} 5937 [(set_attr "type" "neon_store3_3reg<q>")] 5938) 5939 5940;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5941;; here on big endian targets. 5942(define_insn "neon_vst3_lane<mode>" 5943 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 5944 (unspec:<V_three_elem> 5945 [(match_operand:EI 1 "s_register_operand" "w") 5946 (match_operand:SI 2 "immediate_operand" "i") 5947 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5948 UNSPEC_VST3_LANE))] 5949 "TARGET_NEON" 5950{ 5951 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5952 int regno = REGNO (operands[1]); 5953 rtx ops[5]; 5954 ops[0] = operands[0]; 5955 ops[1] = gen_rtx_REG (DImode, regno); 5956 ops[2] = gen_rtx_REG (DImode, regno + 2); 5957 ops[3] = gen_rtx_REG (DImode, regno + 4); 5958 ops[4] = GEN_INT (lane); 5959 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 5960 ops); 5961 return ""; 5962} 5963 [(set_attr "type" "neon_store3_one_lane<q>")] 5964) 5965 5966;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5967;; here on big endian targets. 5968(define_insn "neon_vst3_lane<mode>" 5969 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 5970 (unspec:<V_three_elem> 5971 [(match_operand:CI 1 "s_register_operand" "w") 5972 (match_operand:SI 2 "immediate_operand" "i") 5973 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5974 UNSPEC_VST3_LANE))] 5975 "TARGET_NEON" 5976{ 5977 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5978 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5979 int regno = REGNO (operands[1]); 5980 rtx ops[5]; 5981 if (lane >= max / 2) 5982 { 5983 lane -= max / 2; 5984 regno += 2; 5985 } 5986 ops[0] = operands[0]; 5987 ops[1] = gen_rtx_REG (DImode, regno); 5988 ops[2] = gen_rtx_REG (DImode, regno + 4); 5989 ops[3] = gen_rtx_REG (DImode, regno + 8); 5990 ops[4] = GEN_INT (lane); 5991 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 5992 ops); 5993 return ""; 5994} 5995 [(set_attr "type" "neon_store3_one_lane<q>")] 5996) 5997 5998(define_expand "vec_load_lanesoi<mode>" 5999 [(set (match_operand:OI 0 "s_register_operand") 6000 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 6001 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6002 UNSPEC_VLD4))] 6003 "TARGET_NEON") 6004 6005(define_insn "neon_vld4<mode>" 6006 [(set (match_operand:OI 0 "s_register_operand" "=w") 6007 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 6008 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6009 UNSPEC_VLD4))] 6010 "TARGET_NEON" 6011{ 6012 if (<V_sz_elem> == 64) 6013 return "vld1.64\t%h0, %A1"; 6014 else 6015 return "vld4.<V_sz_elem>\t%h0, %A1"; 6016} 6017 [(set (attr "type") 6018 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 6019 (const_string "neon_load1_4reg<q>") 6020 (const_string "neon_load4_4reg<q>")))] 6021) 6022 6023(define_expand "vec_load_lanesxi<mode>" 6024 [(match_operand:XI 0 "s_register_operand") 6025 (match_operand:XI 1 "neon_struct_operand") 6026 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6027 "TARGET_NEON" 6028{ 6029 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); 6030 DONE; 6031}) 6032 6033(define_expand "neon_vld4<mode>" 6034 [(match_operand:XI 0 "s_register_operand") 6035 (match_operand:XI 1 "neon_struct_operand") 6036 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6037 "TARGET_NEON" 6038{ 6039 rtx mem; 6040 6041 mem = adjust_address (operands[1], OImode, 0); 6042 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); 6043 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 6044 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); 6045 DONE; 6046}) 6047 6048(define_insn "neon_vld4qa<mode>" 6049 [(set (match_operand:XI 0 "s_register_operand" "=w") 6050 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 6051 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6052 UNSPEC_VLD4A))] 6053 "TARGET_NEON" 6054{ 6055 int regno = REGNO (operands[0]); 6056 rtx ops[5]; 6057 ops[0] = gen_rtx_REG (DImode, regno); 6058 ops[1] = gen_rtx_REG (DImode, regno + 4); 6059 ops[2] = gen_rtx_REG (DImode, regno + 8); 6060 ops[3] = gen_rtx_REG (DImode, regno + 12); 6061 ops[4] = operands[1]; 6062 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 6063 return ""; 6064} 6065 [(set_attr "type" "neon_load4_4reg<q>")] 6066) 6067 6068(define_insn "neon_vld4qb<mode>" 6069 [(set (match_operand:XI 0 "s_register_operand" "=w") 6070 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 6071 (match_operand:XI 2 "s_register_operand" "0") 6072 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6073 UNSPEC_VLD4B))] 6074 "TARGET_NEON" 6075{ 6076 int regno = REGNO (operands[0]); 6077 rtx ops[5]; 6078 ops[0] = gen_rtx_REG (DImode, regno + 2); 6079 ops[1] = gen_rtx_REG (DImode, regno + 6); 6080 ops[2] = gen_rtx_REG (DImode, regno + 10); 6081 ops[3] = gen_rtx_REG (DImode, regno + 14); 6082 ops[4] = operands[1]; 6083 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 6084 return ""; 6085} 6086 [(set_attr "type" "neon_load4_4reg<q>")] 6087) 6088 6089;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6090;; here on big endian targets. 6091(define_insn "neon_vld4_lane<mode>" 6092 [(set (match_operand:OI 0 "s_register_operand" "=w") 6093 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6094 (match_operand:OI 2 "s_register_operand" "0") 6095 (match_operand:SI 3 "immediate_operand" "i") 6096 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6097 UNSPEC_VLD4_LANE))] 6098 "TARGET_NEON" 6099{ 6100 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 6101 int regno = REGNO (operands[0]); 6102 rtx ops[6]; 6103 ops[0] = gen_rtx_REG (DImode, regno); 6104 ops[1] = gen_rtx_REG (DImode, regno + 2); 6105 ops[2] = gen_rtx_REG (DImode, regno + 4); 6106 ops[3] = gen_rtx_REG (DImode, regno + 6); 6107 ops[4] = operands[1]; 6108 ops[5] = GEN_INT (lane); 6109 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 6110 ops); 6111 return ""; 6112} 6113 [(set_attr "type" "neon_load4_one_lane<q>")] 6114) 6115 6116;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6117;; here on big endian targets. 6118(define_insn "neon_vld4_lane<mode>" 6119 [(set (match_operand:XI 0 "s_register_operand" "=w") 6120 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6121 (match_operand:XI 2 "s_register_operand" "0") 6122 (match_operand:SI 3 "immediate_operand" "i") 6123 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6124 UNSPEC_VLD4_LANE))] 6125 "TARGET_NEON" 6126{ 6127 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 6128 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 6129 int regno = REGNO (operands[0]); 6130 rtx ops[6]; 6131 if (lane >= max / 2) 6132 { 6133 lane -= max / 2; 6134 regno += 2; 6135 } 6136 ops[0] = gen_rtx_REG (DImode, regno); 6137 ops[1] = gen_rtx_REG (DImode, regno + 4); 6138 ops[2] = gen_rtx_REG (DImode, regno + 8); 6139 ops[3] = gen_rtx_REG (DImode, regno + 12); 6140 ops[4] = operands[1]; 6141 ops[5] = GEN_INT (lane); 6142 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 6143 ops); 6144 return ""; 6145} 6146 [(set_attr "type" "neon_load4_one_lane<q>")] 6147) 6148 6149(define_insn "neon_vld4_dup<mode>" 6150 [(set (match_operand:OI 0 "s_register_operand" "=w") 6151 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6152 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6153 UNSPEC_VLD4_DUP))] 6154 "TARGET_NEON" 6155{ 6156 if (GET_MODE_NUNITS (<MODE>mode) > 1) 6157 { 6158 int regno = REGNO (operands[0]); 6159 rtx ops[5]; 6160 ops[0] = gen_rtx_REG (DImode, regno); 6161 ops[1] = gen_rtx_REG (DImode, regno + 2); 6162 ops[2] = gen_rtx_REG (DImode, regno + 4); 6163 ops[3] = gen_rtx_REG (DImode, regno + 6); 6164 ops[4] = operands[1]; 6165 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", 6166 ops); 6167 return ""; 6168 } 6169 else 6170 return "vld1.<V_sz_elem>\t%h0, %A1"; 6171} 6172 [(set (attr "type") 6173 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 6174 (const_string "neon_load4_all_lanes<q>") 6175 (const_string "neon_load1_1reg<q>")))] 6176) 6177 6178(define_insn "neon_vld4_dupv8bf" 6179 [(set (match_operand:XI 0 "s_register_operand" "=w") 6180 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 6181 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6182 UNSPEC_VLD2_DUP))] 6183 "TARGET_BF16_SIMD" 6184 { 6185 rtx ops[5]; 6186 int tabbase = REGNO (operands[0]); 6187 6188 ops[4] = operands[1]; 6189 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 6190 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 6191 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 6192 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); 6193 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); 6194 return ""; 6195 } 6196 [(set_attr "type" "neon_load4_all_lanes_q")] 6197) 6198 6199(define_expand "vec_store_lanesoi<mode>" 6200 [(set (match_operand:OI 0 "neon_struct_operand") 6201 (unspec:OI [(match_operand:OI 1 "s_register_operand") 6202 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6203 UNSPEC_VST4))] 6204 "TARGET_NEON") 6205 6206(define_insn "neon_vst4<mode>" 6207 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6208 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 6209 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6210 UNSPEC_VST4))] 6211 "TARGET_NEON" 6212{ 6213 if (<V_sz_elem> == 64) 6214 return "vst1.64\t%h1, %A0"; 6215 else 6216 return "vst4.<V_sz_elem>\t%h1, %A0"; 6217} 6218 [(set (attr "type") 6219 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 6220 (const_string "neon_store1_4reg<q>") 6221 (const_string "neon_store4_4reg<q>")))] 6222) 6223 6224(define_expand "vec_store_lanesxi<mode>" 6225 [(match_operand:XI 0 "neon_struct_operand") 6226 (match_operand:XI 1 "s_register_operand") 6227 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6228 "TARGET_NEON" 6229{ 6230 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); 6231 DONE; 6232}) 6233 6234(define_expand "neon_vst4<mode>" 6235 [(match_operand:XI 0 "neon_struct_operand") 6236 (match_operand:XI 1 "s_register_operand") 6237 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6238 "TARGET_NEON" 6239{ 6240 rtx mem; 6241 6242 mem = adjust_address (operands[0], OImode, 0); 6243 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); 6244 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 6245 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); 6246 DONE; 6247}) 6248 6249(define_insn "neon_vst4qa<mode>" 6250 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6251 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 6252 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6253 UNSPEC_VST4A))] 6254 "TARGET_NEON" 6255{ 6256 int regno = REGNO (operands[1]); 6257 rtx ops[5]; 6258 ops[0] = operands[0]; 6259 ops[1] = gen_rtx_REG (DImode, regno); 6260 ops[2] = gen_rtx_REG (DImode, regno + 4); 6261 ops[3] = gen_rtx_REG (DImode, regno + 8); 6262 ops[4] = gen_rtx_REG (DImode, regno + 12); 6263 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 6264 return ""; 6265} 6266 [(set_attr "type" "neon_store4_4reg<q>")] 6267) 6268 6269(define_insn "neon_vst4qb<mode>" 6270 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6271 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 6272 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6273 UNSPEC_VST4B))] 6274 "TARGET_NEON" 6275{ 6276 int regno = REGNO (operands[1]); 6277 rtx ops[5]; 6278 ops[0] = operands[0]; 6279 ops[1] = gen_rtx_REG (DImode, regno + 2); 6280 ops[2] = gen_rtx_REG (DImode, regno + 6); 6281 ops[3] = gen_rtx_REG (DImode, regno + 10); 6282 ops[4] = gen_rtx_REG (DImode, regno + 14); 6283 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 6284 return ""; 6285} 6286 [(set_attr "type" "neon_store4_4reg<q>")] 6287) 6288 6289;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6290;; here on big endian targets. 6291(define_insn "neon_vst4_lane<mode>" 6292 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 6293 (unspec:<V_four_elem> 6294 [(match_operand:OI 1 "s_register_operand" "w") 6295 (match_operand:SI 2 "immediate_operand" "i") 6296 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6297 UNSPEC_VST4_LANE))] 6298 "TARGET_NEON" 6299{ 6300 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6301 int regno = REGNO (operands[1]); 6302 rtx ops[6]; 6303 ops[0] = operands[0]; 6304 ops[1] = gen_rtx_REG (DImode, regno); 6305 ops[2] = gen_rtx_REG (DImode, regno + 2); 6306 ops[3] = gen_rtx_REG (DImode, regno + 4); 6307 ops[4] = gen_rtx_REG (DImode, regno + 6); 6308 ops[5] = GEN_INT (lane); 6309 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 6310 ops); 6311 return ""; 6312} 6313 [(set_attr "type" "neon_store4_one_lane<q>")] 6314) 6315 6316;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6317;; here on big endian targets. 6318(define_insn "neon_vst4_lane<mode>" 6319 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 6320 (unspec:<V_four_elem> 6321 [(match_operand:XI 1 "s_register_operand" "w") 6322 (match_operand:SI 2 "immediate_operand" "i") 6323 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6324 UNSPEC_VST4_LANE))] 6325 "TARGET_NEON" 6326{ 6327 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6328 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 6329 int regno = REGNO (operands[1]); 6330 rtx ops[6]; 6331 if (lane >= max / 2) 6332 { 6333 lane -= max / 2; 6334 regno += 2; 6335 } 6336 ops[0] = operands[0]; 6337 ops[1] = gen_rtx_REG (DImode, regno); 6338 ops[2] = gen_rtx_REG (DImode, regno + 4); 6339 ops[3] = gen_rtx_REG (DImode, regno + 8); 6340 ops[4] = gen_rtx_REG (DImode, regno + 12); 6341 ops[5] = GEN_INT (lane); 6342 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 6343 ops); 6344 return ""; 6345} 6346 [(set_attr "type" "neon_store4_4reg<q>")] 6347) 6348 6349(define_insn "neon_vec_unpack<US>_lo_<mode>" 6350 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6351 (SE:<V_unpack> (vec_select:<V_HALF> 6352 (match_operand:VU 1 "register_operand" "w") 6353 (match_operand:VU 2 "vect_par_constant_low" ""))))] 6354 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6355 "vmovl.<US><V_sz_elem> %q0, %e1" 6356 [(set_attr "type" "neon_shift_imm_long")] 6357) 6358 6359(define_insn "neon_vec_unpack<US>_hi_<mode>" 6360 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6361 (SE:<V_unpack> (vec_select:<V_HALF> 6362 (match_operand:VU 1 "register_operand" "w") 6363 (match_operand:VU 2 "vect_par_constant_high" ""))))] 6364 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6365 "vmovl.<US><V_sz_elem> %q0, %f1" 6366 [(set_attr "type" "neon_shift_imm_long")] 6367) 6368 6369(define_expand "vec_unpack<US>_hi_<mode>" 6370 [(match_operand:<V_unpack> 0 "register_operand") 6371 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 6372 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6373 { 6374 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6375 rtx t1; 6376 int i; 6377 for (i = 0; i < (<V_mode_nunits>/2); i++) 6378 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); 6379 6380 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6381 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 6382 operands[1], 6383 t1)); 6384 DONE; 6385 } 6386) 6387 6388(define_expand "vec_unpack<US>_lo_<mode>" 6389 [(match_operand:<V_unpack> 0 "register_operand") 6390 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 6391 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6392 { 6393 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6394 rtx t1; 6395 int i; 6396 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6397 RTVEC_ELT (v, i) = GEN_INT (i); 6398 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6399 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 6400 operands[1], 6401 t1)); 6402 DONE; 6403 } 6404) 6405 6406(define_insn "neon_vec_<US>mult_lo_<mode>" 6407 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6408 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6409 (match_operand:VU 1 "register_operand" "w") 6410 (match_operand:VU 2 "vect_par_constant_low" ""))) 6411 (SE:<V_unpack> (vec_select:<V_HALF> 6412 (match_operand:VU 3 "register_operand" "w") 6413 (match_dup 2)))))] 6414 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6415 "vmull.<US><V_sz_elem> %q0, %e1, %e3" 6416 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6417) 6418 6419(define_expand "vec_widen_<US>mult_lo_<mode>" 6420 [(match_operand:<V_unpack> 0 "register_operand") 6421 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6422 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] 6423 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6424 { 6425 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6426 rtx t1; 6427 int i; 6428 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6429 RTVEC_ELT (v, i) = GEN_INT (i); 6430 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6431 6432 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], 6433 operands[1], 6434 t1, 6435 operands[2])); 6436 DONE; 6437 } 6438) 6439 6440(define_insn "neon_vec_<US>mult_hi_<mode>" 6441 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6442 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6443 (match_operand:VU 1 "register_operand" "w") 6444 (match_operand:VU 2 "vect_par_constant_high" ""))) 6445 (SE:<V_unpack> (vec_select:<V_HALF> 6446 (match_operand:VU 3 "register_operand" "w") 6447 (match_dup 2)))))] 6448 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6449 "vmull.<US><V_sz_elem> %q0, %f1, %f3" 6450 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6451) 6452 6453(define_expand "vec_widen_<US>mult_hi_<mode>" 6454 [(match_operand:<V_unpack> 0 "register_operand") 6455 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6456 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] 6457 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6458 { 6459 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6460 rtx t1; 6461 int i; 6462 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6463 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); 6464 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6465 6466 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], 6467 operands[1], 6468 t1, 6469 operands[2])); 6470 DONE; 6471 6472 } 6473) 6474 6475(define_insn "neon_vec_<US>shiftl_<mode>" 6476 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6477 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") 6478 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] 6479 "TARGET_NEON" 6480{ 6481 return "vshll.<US><V_sz_elem> %q0, %P1, %2"; 6482} 6483 [(set_attr "type" "neon_shift_imm_long")] 6484) 6485 6486(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6487 [(match_operand:<V_unpack> 0 "register_operand") 6488 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6489 (match_operand:SI 2 "immediate_operand")] 6490 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6491 { 6492 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6493 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), 6494 operands[2])); 6495 DONE; 6496 } 6497) 6498 6499(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6500 [(match_operand:<V_unpack> 0 "register_operand") 6501 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6502 (match_operand:SI 2 "immediate_operand")] 6503 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6504 { 6505 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6506 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 6507 GET_MODE_SIZE (<V_HALF>mode)), 6508 operands[2])); 6509 DONE; 6510 } 6511) 6512 6513;; Vectorize for non-neon-quad case 6514(define_insn "neon_unpack<US>_<mode>" 6515 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6516 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] 6517 "TARGET_NEON" 6518 "vmovl.<US><V_sz_elem> %q0, %P1" 6519 [(set_attr "type" "neon_move")] 6520) 6521 6522(define_expand "vec_unpack<US>_lo_<mode>" 6523 [(match_operand:<V_double_width> 0 "register_operand") 6524 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6525 "TARGET_NEON" 6526{ 6527 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6528 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6529 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6530 6531 DONE; 6532} 6533) 6534 6535(define_expand "vec_unpack<US>_hi_<mode>" 6536 [(match_operand:<V_double_width> 0 "register_operand") 6537 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6538 "TARGET_NEON" 6539{ 6540 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6541 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6542 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6543 6544 DONE; 6545} 6546) 6547 6548(define_insn "neon_vec_<US>mult_<mode>" 6549 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6550 (mult:<V_widen> (SE:<V_widen> 6551 (match_operand:VDI 1 "register_operand" "w")) 6552 (SE:<V_widen> 6553 (match_operand:VDI 2 "register_operand" "w"))))] 6554 "TARGET_NEON" 6555 "vmull.<US><V_sz_elem> %q0, %P1, %P2" 6556 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6557) 6558 6559(define_expand "vec_widen_<US>mult_hi_<mode>" 6560 [(match_operand:<V_double_width> 0 "register_operand") 6561 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6562 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] 6563 "TARGET_NEON" 6564 { 6565 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6566 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6567 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6568 6569 DONE; 6570 6571 } 6572) 6573 6574(define_expand "vec_widen_<US>mult_lo_<mode>" 6575 [(match_operand:<V_double_width> 0 "register_operand") 6576 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6577 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] 6578 "TARGET_NEON" 6579 { 6580 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6581 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6582 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6583 6584 DONE; 6585 6586 } 6587) 6588 6589(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6590 [(match_operand:<V_double_width> 0 "register_operand") 6591 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6592 (match_operand:SI 2 "immediate_operand")] 6593 "TARGET_NEON" 6594 { 6595 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6596 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6597 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6598 6599 DONE; 6600 } 6601) 6602 6603(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6604 [(match_operand:<V_double_width> 0 "register_operand") 6605 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6606 (match_operand:SI 2 "immediate_operand")] 6607 "TARGET_NEON" 6608 { 6609 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6610 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6611 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6612 6613 DONE; 6614 } 6615) 6616 6617; FIXME: These instruction patterns can't be used safely in big-endian mode 6618; because the ordering of vector elements in Q registers is different from what 6619; the semantics of the instructions require. 6620 6621(define_insn "vec_pack_trunc_<mode>" 6622 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") 6623 (vec_concat:<V_narrow_pack> 6624 (truncate:<V_narrow> 6625 (match_operand:VN 1 "register_operand" "w")) 6626 (truncate:<V_narrow> 6627 (match_operand:VN 2 "register_operand" "w"))))] 6628 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6629 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" 6630 [(set_attr "type" "multiple") 6631 (set_attr "length" "8")] 6632) 6633 6634;; For the non-quad case. 6635(define_insn "neon_vec_pack_trunc_<mode>" 6636 [(set (match_operand:<V_narrow> 0 "register_operand" "=w") 6637 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] 6638 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6639 "vmovn.i<V_sz_elem>\t%P0, %q1" 6640 [(set_attr "type" "neon_move_narrow_q")] 6641) 6642 6643(define_expand "vec_pack_trunc_<mode>" 6644 [(match_operand:<V_narrow_pack> 0 "register_operand") 6645 (match_operand:VSHFT 1 "register_operand") 6646 (match_operand:VSHFT 2 "register_operand")] 6647 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6648{ 6649 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); 6650 6651 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 6652 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 6653 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); 6654 DONE; 6655}) 6656 6657(define_insn "neon_vabd<mode>_2" 6658 [(set (match_operand:VF 0 "s_register_operand" "=w") 6659 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") 6660 (match_operand:VF 2 "s_register_operand" "w"))))] 6661 "TARGET_NEON && flag_unsafe_math_optimizations" 6662 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6663 [(set_attr "type" "neon_fp_abd_s<q>")] 6664) 6665 6666(define_insn "neon_vabd<mode>_3" 6667 [(set (match_operand:VF 0 "s_register_operand" "=w") 6668 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") 6669 (match_operand:VF 2 "s_register_operand" "w")] 6670 UNSPEC_VSUB)))] 6671 "TARGET_NEON && flag_unsafe_math_optimizations" 6672 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6673 [(set_attr "type" "neon_fp_abd_s<q>")] 6674) 6675 6676(define_insn "neon_<sup>mmlav16qi" 6677 [(set (match_operand:V4SI 0 "register_operand" "=w") 6678 (plus:V4SI 6679 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") 6680 (match_operand:V16QI 3 "register_operand" "w")] MATMUL) 6681 (match_operand:V4SI 1 "register_operand" "0")))] 6682 "TARGET_I8MM" 6683 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3" 6684 [(set_attr "type" "neon_mla_s_q")] 6685) 6686 6687(define_insn "neon_vbfdot<VCVTF:mode>" 6688 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6689 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6690 (unspec:VCVTF [ 6691 (match_operand:<VSF2BF> 2 "register_operand" "w") 6692 (match_operand:<VSF2BF> 3 "register_operand" "w")] 6693 UNSPEC_DOT_S)))] 6694 "TARGET_BF16_SIMD" 6695 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 6696 [(set_attr "type" "neon_dot<q>")] 6697) 6698 6699(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>" 6700 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6701 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6702 (unspec:VCVTF [ 6703 (match_operand:<VSF2BF> 2 "register_operand" "w") 6704 (match_operand:V4BF 3 "register_operand" "x") 6705 (match_operand:SI 4 "immediate_operand" "i")] 6706 UNSPEC_DOT_S)))] 6707 "TARGET_BF16_SIMD" 6708 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" 6709 [(set_attr "type" "neon_dot<q>")] 6710) 6711 6712(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>" 6713 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6714 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6715 (unspec:VCVTF [ 6716 (match_operand:<VSF2BF> 2 "register_operand" "w") 6717 (match_operand:V8BF 3 "register_operand" "x") 6718 (match_operand:SI 4 "immediate_operand" "i")] 6719 UNSPEC_DOT_S)))] 6720 "TARGET_BF16_SIMD" 6721 { 6722 int lane = INTVAL (operands[4]); 6723 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4; 6724 if (lane < half) 6725 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; 6726 else 6727 { 6728 operands[4] = GEN_INT (lane - half); 6729 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; 6730 } 6731 } 6732 [(set_attr "type" "neon_dot<q>")] 6733) 6734 6735(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>" 6736 [(set (match_operand:VBFCVT 0 "register_operand" "=w") 6737 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")] 6738 UNSPEC_BFCVT))] 6739 "TARGET_BF16_SIMD" 6740 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1" 6741 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 6742) 6743 6744(define_insn "neon_vbfcvtv4sf_highv8bf" 6745 [(set (match_operand:V8BF 0 "register_operand" "=w") 6746 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") 6747 (match_operand:V4SF 2 "register_operand" "w")] 6748 UNSPEC_BFCVT_HIGH))] 6749 "TARGET_BF16_SIMD" 6750 "vcvt.bf16.f32\\t%f0, %q2" 6751 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 6752) 6753 6754(define_insn "neon_vbfcvtsf" 6755 [(set (match_operand:BF 0 "register_operand" "=t") 6756 (unspec:BF [(match_operand:SF 1 "register_operand" "t")] 6757 UNSPEC_BFCVT))] 6758 "TARGET_BF16_FP" 6759 "vcvtb.bf16.f32\\t%0, %1" 6760 [(set_attr "type" "f_cvt")] 6761) 6762 6763(define_insn "neon_vbfcvt<VBFCVT:mode>" 6764 [(set (match_operand:V4SF 0 "register_operand" "=w") 6765 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")] 6766 UNSPEC_BFCVT))] 6767 "TARGET_BF16_SIMD" 6768 "vshll.u32\\t%q0, %<V_bf_low>1, #16" 6769 [(set_attr "type" "neon_shift_imm_q")] 6770) 6771 6772(define_insn "neon_vbfcvt_highv8bf" 6773 [(set (match_operand:V4SF 0 "register_operand" "=w") 6774 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] 6775 UNSPEC_BFCVT_HIGH))] 6776 "TARGET_BF16_SIMD" 6777 "vshll.u32\\t%q0, %f1, #16" 6778 [(set_attr "type" "neon_shift_imm_q")] 6779) 6780 6781;; Convert a BF scalar operand to SF via VSHL. 6782;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands 6783;; would be allocated, therefore the operands must be converted to intermediate 6784;; vectors (i.e. V2SI) in order to apply 64-bit registers. 6785(define_expand "neon_vbfcvtbf" 6786 [(match_operand:SF 0 "register_operand") 6787 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)] 6788 "TARGET_BF16_FP" 6789{ 6790 rtx op0 = gen_reg_rtx (V2SImode); 6791 rtx op1 = gen_reg_rtx (V2SImode); 6792 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1])); 6793 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode))); 6794 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0)); 6795 DONE; 6796}) 6797 6798;; Convert BF mode to V2SI and V2SI to SF. 6799;; Implement this by allocating a 32-bit operand in the low half of a 64-bit 6800;; register indexed by a 32-bit sub-register number. 6801;; This will generate reloads but compiler can optimize out the moves. 6802;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable 6803;; range so that to avoid extra moves. 6804(define_insn "neon_vbfcvtbf_cvtmode<mode>" 6805 [(set (match_operand:VBFCVTM 0 "register_operand" "=x") 6806 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")] 6807 UNSPEC_BFCVT))] 6808 "TARGET_BF16_FP" 6809 "" 6810) 6811 6812(define_insn "neon_vmmlav8bf" 6813 [(set (match_operand:V4SF 0 "register_operand" "=w") 6814 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 6815 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6816 (match_operand:V8BF 3 "register_operand" "w")] 6817 UNSPEC_BFMMLA)))] 6818 "TARGET_BF16_SIMD" 6819 "vmmla.bf16\\t%q0, %q2, %q3" 6820 [(set_attr "type" "neon_fp_mla_s_q")] 6821) 6822 6823(define_insn "neon_vfma<bt>v8bf" 6824 [(set (match_operand:V4SF 0 "register_operand" "=w") 6825 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6826 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6827 (match_operand:V8BF 3 "register_operand" "w")] 6828 BF_MA)))] 6829 "TARGET_BF16_SIMD" 6830 "vfma<bt>.bf16\\t%q0, %q2, %q3" 6831 [(set_attr "type" "neon_fp_mla_s_q")] 6832) 6833 6834(define_insn "neon_vfma<bt>_lanev8bf" 6835 [(set (match_operand:V4SF 0 "register_operand" "=w") 6836 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6837 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6838 (match_operand:V4BF 3 "register_operand" "x") 6839 (match_operand:SI 4 "const_int_operand" "n")] 6840 BF_MA)))] 6841 "TARGET_BF16_SIMD" 6842 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]" 6843 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 6844) 6845 6846(define_expand "neon_vfma<bt>_laneqv8bf" 6847 [(set (match_operand:V4SF 0 "register_operand" "=w") 6848 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6849 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6850 (match_operand:V8BF 3 "register_operand" "x") 6851 (match_operand:SI 4 "const_int_operand" "n")] 6852 BF_MA)))] 6853 "TARGET_BF16_SIMD" 6854 { 6855 int lane = INTVAL (operands[4]); 6856 gcc_assert (IN_RANGE(lane, 0, 7)); 6857 if (lane < 4) 6858 { 6859 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4])); 6860 } 6861 else 6862 { 6863 rtx op_highpart = gen_reg_rtx (V4BFmode); 6864 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3])); 6865 operands[4] = GEN_INT (lane - 4); 6866 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4])); 6867 } 6868 DONE; 6869 } 6870 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 6871) 6872