xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arm/neon.md (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1;; ARM NEON coprocessor Machine Description
2;; Copyright (C) 2006-2020 Free Software Foundation, Inc.
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21
22;; Attribute used to permit string comparisons against <VQH_mnem> in
23;; type attribute definitions.
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26(define_insn "unaligned_storev8qi"
27  [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29		     UNSPEC_UNALIGNED_STORE))]
30  "TARGET_NEON"
31  "*
32  return output_move_neon (operands);
33  "
34  [(set_attr "type" "neon_store1_1reg")])
35
36(define_insn "*neon_mov<mode>"
37  [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38	  "=w,Un,w, w, w,  ?r,?w,?r, ?Us,*r")
39	(match_operand:VDXMOV 1 "general_operand"
40	  " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
41  "TARGET_NEON
42   && (register_operand (operands[0], <MODE>mode)
43       || register_operand (operands[1], <MODE>mode))"
44{
45  if (which_alternative == 2 || which_alternative == 3)
46    {
47      int width, is_valid;
48      static char templ[40];
49
50      is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51        &operands[1], &width);
52
53      gcc_assert (is_valid != 0);
54
55      if (width == 0)
56        return "vmov.f32\t%P0, %1  @ <mode>";
57      else
58        sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
59
60      return templ;
61    }
62
63  switch (which_alternative)
64    {
65    case 0: return "vmov\t%P0, %P1  @ <mode>";
66    case 1: case 4: return output_move_neon (operands);
67    case 2: case 3: gcc_unreachable ();
68    case 5: return "vmov\t%Q0, %R0, %P1  @ <mode>";
69    case 6: return "vmov\t%P0, %Q1, %R1  @ <mode>";
70    case 9: return "#";
71    default: return output_move_double (operands, true, NULL);
72    }
73}
74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75                    neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76                    neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77		    multiple")
78  (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79  (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,1020,*,*")
80  (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,1018,*,*")
81  (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
82
83(define_insn "*neon_mov<mode>"
84  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85	  "=w,Un,w, w, w,  ?r,?w,?r,?r,  ?Us")
86	(match_operand:VQXMOV 1 "general_operand"
87	  " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
88  "TARGET_NEON
89   && (register_operand (operands[0], <MODE>mode)
90       || register_operand (operands[1], <MODE>mode))"
91{
92  if (which_alternative == 2 || which_alternative == 3)
93    {
94      int width, is_valid;
95      static char templ[40];
96
97      is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98        &operands[1], &width);
99
100      gcc_assert (is_valid != 0);
101
102      if (width == 0)
103        return "vmov.f32\t%q0, %1  @ <mode>";
104      else
105        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
106
107      return templ;
108    }
109
110  switch (which_alternative)
111    {
112    case 0: return "vmov\t%q0, %q1  @ <mode>";
113    case 1: case 4: return output_move_neon (operands);
114    case 2: case 3: gcc_unreachable ();
115    case 5: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
116    case 6: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
117    default: return output_move_quad (operands);
118    }
119}
120  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121                     neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122                     neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123   (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124   (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125   (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126   (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
127
128/* We define these mov expanders to match the standard mov$a optab to prevent
129   the mid-end from trying to do a subreg for these modes which is the most
130   inefficient way to expand the move.  Also big-endian subreg's aren't
131   allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132   Without these RTL generation patterns the mid-end would attempt to take a
133   sub-reg and may ICE if it can't.  */
134
135(define_expand "movti"
136  [(set (match_operand:TI 0 "nonimmediate_operand")
137	(match_operand:TI 1 "general_operand"))]
138  "TARGET_NEON"
139{
140  gcc_checking_assert (aligned_operand (operands[0], TImode));
141  gcc_checking_assert (aligned_operand (operands[1], TImode));
142  if (can_create_pseudo_p ())
143    {
144      if (!REG_P (operands[0]))
145	operands[1] = force_reg (TImode, operands[1]);
146    }
147})
148
149(define_expand "mov<mode>"
150  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151	(match_operand:VSTRUCT 1 "general_operand"))]
152  "TARGET_NEON || TARGET_HAVE_MVE"
153{
154  gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155  gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156  if (can_create_pseudo_p ())
157    {
158      if (!REG_P (operands[0]))
159	operands[1] = force_reg (<MODE>mode, operands[1]);
160    }
161})
162
163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164;; two groups.  The pattern movv8hf is common for MVE and NEON, so it is moved
165;; into vec-common.md file.  Remaining mov expand patterns with half float and
166;; bfloats are implemented below.
167(define_expand "mov<mode>"
168  [(set (match_operand:VHFBF_split 0 "s_register_operand")
169	(match_operand:VHFBF_split 1 "s_register_operand"))]
170  "TARGET_NEON"
171{
172  gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173  gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174  if (can_create_pseudo_p ())
175    {
176      if (!REG_P (operands[0]))
177	operands[1] = force_reg (<MODE>mode, operands[1]);
178    }
179})
180
181(define_insn "*neon_mov<mode>"
182  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
183	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
184  "(TARGET_NEON || TARGET_HAVE_MVE)
185   && (register_operand (operands[0], <MODE>mode)
186       || register_operand (operands[1], <MODE>mode))"
187{
188  switch (which_alternative)
189    {
190    case 0: return "#";
191    case 1: case 2: return output_move_neon (operands);
192    default: gcc_unreachable ();
193    }
194}
195  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
197
198(define_split
199  [(set (match_operand:EI 0 "s_register_operand" "")
200	(match_operand:EI 1 "s_register_operand" ""))]
201  "TARGET_NEON && reload_completed"
202  [(set (match_dup 0) (match_dup 1))
203   (set (match_dup 2) (match_dup 3))]
204{
205  int rdest = REGNO (operands[0]);
206  int rsrc = REGNO (operands[1]);
207  rtx dest[2], src[2];
208
209  dest[0] = gen_rtx_REG (TImode, rdest);
210  src[0] = gen_rtx_REG (TImode, rsrc);
211  dest[1] = gen_rtx_REG (DImode, rdest + 4);
212  src[1] = gen_rtx_REG (DImode, rsrc + 4);
213
214  neon_disambiguate_copy (operands, dest, src, 2);
215})
216
217(define_split
218  [(set (match_operand:OI 0 "s_register_operand" "")
219	(match_operand:OI 1 "s_register_operand" ""))]
220  "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221  [(set (match_dup 0) (match_dup 1))
222   (set (match_dup 2) (match_dup 3))]
223{
224  int rdest = REGNO (operands[0]);
225  int rsrc = REGNO (operands[1]);
226  rtx dest[2], src[2];
227
228  dest[0] = gen_rtx_REG (TImode, rdest);
229  src[0] = gen_rtx_REG (TImode, rsrc);
230  dest[1] = gen_rtx_REG (TImode, rdest + 4);
231  src[1] = gen_rtx_REG (TImode, rsrc + 4);
232
233  neon_disambiguate_copy (operands, dest, src, 2);
234})
235
236(define_split
237  [(set (match_operand:CI 0 "s_register_operand" "")
238	(match_operand:CI 1 "s_register_operand" ""))]
239  "TARGET_NEON && reload_completed"
240  [(set (match_dup 0) (match_dup 1))
241   (set (match_dup 2) (match_dup 3))
242   (set (match_dup 4) (match_dup 5))]
243{
244  int rdest = REGNO (operands[0]);
245  int rsrc = REGNO (operands[1]);
246  rtx dest[3], src[3];
247
248  dest[0] = gen_rtx_REG (TImode, rdest);
249  src[0] = gen_rtx_REG (TImode, rsrc);
250  dest[1] = gen_rtx_REG (TImode, rdest + 4);
251  src[1] = gen_rtx_REG (TImode, rsrc + 4);
252  dest[2] = gen_rtx_REG (TImode, rdest + 8);
253  src[2] = gen_rtx_REG (TImode, rsrc + 8);
254
255  neon_disambiguate_copy (operands, dest, src, 3);
256})
257
258(define_split
259  [(set (match_operand:XI 0 "s_register_operand" "")
260	(match_operand:XI 1 "s_register_operand" ""))]
261  "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262  [(set (match_dup 0) (match_dup 1))
263   (set (match_dup 2) (match_dup 3))
264   (set (match_dup 4) (match_dup 5))
265   (set (match_dup 6) (match_dup 7))]
266{
267  int rdest = REGNO (operands[0]);
268  int rsrc = REGNO (operands[1]);
269  rtx dest[4], src[4];
270
271  dest[0] = gen_rtx_REG (TImode, rdest);
272  src[0] = gen_rtx_REG (TImode, rsrc);
273  dest[1] = gen_rtx_REG (TImode, rdest + 4);
274  src[1] = gen_rtx_REG (TImode, rsrc + 4);
275  dest[2] = gen_rtx_REG (TImode, rdest + 8);
276  src[2] = gen_rtx_REG (TImode, rsrc + 8);
277  dest[3] = gen_rtx_REG (TImode, rdest + 12);
278  src[3] = gen_rtx_REG (TImode, rsrc + 12);
279
280  neon_disambiguate_copy (operands, dest, src, 4);
281})
282
283(define_expand "movmisalign<mode>"
284  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
286		     UNSPEC_MISALIGNED_ACCESS))]
287  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288{
289  rtx adjust_mem;
290  /* This pattern is not permitted to fail during expansion: if both arguments
291     are non-registers (e.g. memory := constant, which can be created by the
292     auto-vectorizer), force operand 1 into a register.  */
293  if (!s_register_operand (operands[0], <MODE>mode)
294      && !s_register_operand (operands[1], <MODE>mode))
295    operands[1] = force_reg (<MODE>mode, operands[1]);
296
297  if (s_register_operand (operands[0], <MODE>mode))
298    adjust_mem = operands[1];
299  else
300    adjust_mem = operands[0];
301
302  /* Legitimize address.  */
303  if (!neon_vector_mem_operand (adjust_mem, 2, true))
304    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
305
306})
307
308(define_insn "*movmisalign<mode>_neon_store"
309  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
310	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
311		    UNSPEC_MISALIGNED_ACCESS))]
312  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
313  "vst1.<V_sz_elem>\t{%P1}, %A0"
314  [(set_attr "type" "neon_store1_1reg<q>")])
315
316(define_insn "*movmisalign<mode>_neon_load"
317  [(set (match_operand:VDX 0 "s_register_operand"			"=w")
318	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
319									" Um")]
320		    UNSPEC_MISALIGNED_ACCESS))]
321  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
322  "vld1.<V_sz_elem>\t{%P0}, %A1"
323  [(set_attr "type" "neon_load1_1reg<q>")])
324
325(define_insn "*movmisalign<mode>_neon_store"
326  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
327	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
328		    UNSPEC_MISALIGNED_ACCESS))]
329  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
330  "vst1.<V_sz_elem>\t{%q1}, %A0"
331  [(set_attr "type" "neon_store1_1reg<q>")])
332
333(define_insn "*movmisalign<mode>_neon_load"
334  [(set (match_operand:VQX 0 "s_register_operand"			"=w")
335	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
336									" Um")]
337		    UNSPEC_MISALIGNED_ACCESS))]
338  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
339  "vld1.<V_sz_elem>\t{%q0}, %A1"
340  [(set_attr "type" "neon_load1_1reg<q>")])
341
342(define_insn "@vec_set<mode>_internal"
343  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
344        (vec_merge:VD_LANE
345          (vec_duplicate:VD_LANE
346            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347          (match_operand:VD_LANE 3 "s_register_operand" "0,0")
348          (match_operand:SI 2 "immediate_operand" "i,i")))]
349  "TARGET_NEON"
350{
351  int elt = ffs ((int) INTVAL (operands[2])) - 1;
352  if (BYTES_BIG_ENDIAN)
353    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
354  operands[2] = GEN_INT (elt);
355
356  if (which_alternative == 0)
357    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
358  else
359    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
360}
361  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
362
363(define_insn "@vec_set<mode>_internal"
364  [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
365        (vec_merge:VQ2
366          (vec_duplicate:VQ2
367            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
368          (match_operand:VQ2 3 "s_register_operand" "0,0")
369          (match_operand:SI 2 "immediate_operand" "i,i")))]
370  "TARGET_NEON"
371{
372  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
373  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
374  int elt = elem % half_elts;
375  int hi = (elem / half_elts) * 2;
376  int regno = REGNO (operands[0]);
377
378  if (BYTES_BIG_ENDIAN)
379    elt = half_elts - 1 - elt;
380
381  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
382  operands[2] = GEN_INT (elt);
383
384  if (which_alternative == 0)
385    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
386  else
387    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
388}
389  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
390)
391
392(define_insn "@vec_set<mode>_internal"
393  [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
394        (vec_merge:V2DI_ONLY
395          (vec_duplicate:V2DI_ONLY
396            (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
397          (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
398          (match_operand:SI 2 "immediate_operand" "i,i")))]
399  "TARGET_NEON"
400{
401  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
402  int regno = REGNO (operands[0]) + 2 * elem;
403
404  operands[0] = gen_rtx_REG (DImode, regno);
405
406  if (which_alternative == 0)
407    return "vld1.64\t%P0, %A1";
408  else
409    return "vmov\t%P0, %Q1, %R1";
410}
411  [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
412)
413
414(define_insn "vec_extract<mode><V_elem_l>"
415  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
416        (vec_select:<V_elem>
417          (match_operand:VD_LANE 1 "s_register_operand" "w,w")
418          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
419  "TARGET_NEON"
420{
421  if (BYTES_BIG_ENDIAN)
422    {
423      int elt = INTVAL (operands[2]);
424      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
425      operands[2] = GEN_INT (elt);
426    }
427
428  if (which_alternative == 0)
429    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
430  else
431    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
432}
433  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
434)
435
436;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
437;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
438;; by define_expand in vec-common.md file.
439(define_insn "neon_vec_extract<mode><V_elem_l>"
440  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
441	(vec_select:<V_elem>
442          (match_operand:VQ2 1 "s_register_operand" "w,w")
443          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444  "TARGET_NEON"
445{
446  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
447  int elt = INTVAL (operands[2]) % half_elts;
448  int hi = (INTVAL (operands[2]) / half_elts) * 2;
449  int regno = REGNO (operands[1]);
450
451  if (BYTES_BIG_ENDIAN)
452    elt = half_elts - 1 - elt;
453
454  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
455  operands[2] = GEN_INT (elt);
456
457  if (which_alternative == 0)
458    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
459  else
460    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
461}
462  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463)
464
465;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
466;; and this pattern is called by define_expand in vec-common.md file.
467(define_insn "neon_vec_extractv2didi"
468  [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
469	(vec_select:DI
470          (match_operand:V2DI 1 "s_register_operand" "w,w")
471          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
472  "TARGET_NEON"
473{
474  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
475
476  operands[1] = gen_rtx_REG (DImode, regno);
477
478  if (which_alternative == 0)
479    return "vst1.64\t{%P1}, %A0  @ v2di";
480  else
481    return "vmov\t%Q0, %R0, %P1  @ v2di";
482}
483  [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
484)
485
486(define_expand "vec_init<mode><V_elem_l>"
487  [(match_operand:VDQ 0 "s_register_operand")
488   (match_operand 1 "" "")]
489  "TARGET_NEON || TARGET_HAVE_MVE"
490{
491  neon_expand_vector_init (operands[0], operands[1]);
492  DONE;
493})
494
495;; Doubleword and quadword arithmetic.
496
497;; NOTE: some other instructions also support 64-bit integer
498;; element size, which we could potentially use for "long long" operations.
499
500(define_insn "*add<mode>3_neon"
501  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
502        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
503		  (match_operand:VDQ 2 "s_register_operand" "w")))]
504  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
505  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
506  [(set (attr "type")
507      (if_then_else (match_test "<Is_float_mode>")
508                    (const_string "neon_fp_addsub_s<q>")
509                    (const_string "neon_add<q>")))]
510)
511
512;; As with SFmode, full support for HFmode vector arithmetic is only available
513;; when flag-unsafe-math-optimizations is enabled.
514
515;; Add pattern with modes V8HF and V4HF is split into separate patterns to add
516;; support for standard pattern addv8hf3 in MVE.  Following pattern is called
517;; from "addv8hf3" standard pattern inside vec-common.md file.
518
519(define_insn "addv8hf3_neon"
520  [(set
521    (match_operand:V8HF 0 "s_register_operand" "=w")
522    (plus:V8HF
523     (match_operand:V8HF 1 "s_register_operand" "w")
524     (match_operand:V8HF 2 "s_register_operand" "w")))]
525 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
526 "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
527 [(set_attr "type" "neon_fp_addsub_s_q")]
528)
529
530(define_insn "addv4hf3"
531  [(set
532    (match_operand:V4HF 0 "s_register_operand" "=w")
533    (plus:V4HF
534     (match_operand:V4HF 1 "s_register_operand" "w")
535     (match_operand:V4HF 2 "s_register_operand" "w")))]
536 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
537 "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
538 [(set_attr "type" "neon_fp_addsub_s_q")]
539)
540
541(define_insn "add<mode>3_fp16"
542  [(set
543    (match_operand:VH 0 "s_register_operand" "=w")
544    (plus:VH
545     (match_operand:VH 1 "s_register_operand" "w")
546     (match_operand:VH 2 "s_register_operand" "w")))]
547 "TARGET_NEON_FP16INST"
548 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
549 [(set (attr "type")
550   (if_then_else (match_test "<Is_float_mode>")
551    (const_string "neon_fp_addsub_s<q>")
552    (const_string "neon_add<q>")))]
553)
554
555(define_insn "*sub<mode>3_neon"
556  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
557        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
558                   (match_operand:VDQ 2 "s_register_operand" "w")))]
559  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
560  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
561  [(set (attr "type")
562      (if_then_else (match_test "<Is_float_mode>")
563                    (const_string "neon_fp_addsub_s<q>")
564                    (const_string "neon_sub<q>")))]
565)
566
567(define_insn "sub<mode>3"
568 [(set
569   (match_operand:VH 0 "s_register_operand" "=w")
570   (minus:VH
571    (match_operand:VH 1 "s_register_operand" "w")
572    (match_operand:VH 2 "s_register_operand" "w")))]
573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set_attr "type" "neon_sub<q>")]
576)
577
578(define_insn "sub<mode>3_fp16"
579 [(set
580   (match_operand:VH 0 "s_register_operand" "=w")
581   (minus:VH
582    (match_operand:VH 1 "s_register_operand" "w")
583    (match_operand:VH 2 "s_register_operand" "w")))]
584 "TARGET_NEON_FP16INST"
585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
586 [(set_attr "type" "neon_sub<q>")]
587)
588
589(define_insn "*mul<mode>3_neon"
590  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
591        (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
592                   (match_operand:VDQW 2 "s_register_operand" "w")))]
593  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
594  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
595  [(set (attr "type")
596      (if_then_else (match_test "<Is_float_mode>")
597		    (const_string "neon_fp_mul_s<q>")
598                    (const_string "neon_mul_<V_elem_ch><q>")))]
599)
600
601/* Perform division using multiply-by-reciprocal.
602   Reciprocal is calculated using Newton-Raphson method.
603   Enabled with -funsafe-math-optimizations -freciprocal-math
604   and disabled for -Os since it increases code size .  */
605
606(define_expand "div<mode>3"
607  [(set (match_operand:VCVTF 0 "s_register_operand")
608        (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
609		  (match_operand:VCVTF 2 "s_register_operand")))]
610  "TARGET_NEON && !optimize_size
611   && flag_reciprocal_math"
612  {
613    rtx rec = gen_reg_rtx (<MODE>mode);
614    rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
615
616    /* Reciprocal estimate.  */
617    emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
618
619    /* Perform 2 iterations of newton-raphson method.  */
620    for (int i = 0; i < 2; i++)
621      {
622	emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
623	emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
624      }
625
626    /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
627    emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
628    DONE;
629  }
630)
631
632
633(define_insn "mul<mode>3add<mode>_neon"
634  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
635        (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
636                            (match_operand:VDQW 3 "s_register_operand" "w"))
637		  (match_operand:VDQW 1 "s_register_operand" "0")))]
638  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
639  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
640  [(set (attr "type")
641      (if_then_else (match_test "<Is_float_mode>")
642		    (const_string "neon_fp_mla_s<q>")
643		    (const_string "neon_mla_<V_elem_ch><q>")))]
644)
645
646(define_insn "mul<mode>3add<mode>_neon"
647  [(set (match_operand:VH 0 "s_register_operand" "=w")
648	(plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
649			  (match_operand:VH 3 "s_register_operand" "w"))
650		  (match_operand:VH 1 "s_register_operand" "0")))]
651  "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
652  "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
653  [(set_attr "type" "neon_fp_mla_s<q>")]
654)
655
656(define_insn "mul<mode>3neg<mode>add<mode>_neon"
657  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
658        (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
659                    (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
660                               (match_operand:VDQW 3 "s_register_operand" "w"))))]
661  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
662  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
663  [(set (attr "type")
664      (if_then_else (match_test "<Is_float_mode>")
665		    (const_string "neon_fp_mla_s<q>")
666		    (const_string "neon_mla_<V_elem_ch><q>")))]
667)
668
669;; Fused multiply-accumulate
670;; We define each insn twice here:
671;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
672;;       to be able to use when converting to FMA.
673;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
674(define_insn "fma<VCVTF:mode>4"
675  [(set (match_operand:VCVTF 0 "register_operand" "=w")
676        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
677		 (match_operand:VCVTF 2 "register_operand" "w")
678		 (match_operand:VCVTF 3 "register_operand" "0")))]
679  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
680  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
681  [(set_attr "type" "neon_fp_mla_s<q>")]
682)
683
684(define_insn "fma<VCVTF:mode>4_intrinsic"
685  [(set (match_operand:VCVTF 0 "register_operand" "=w")
686        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
687		 (match_operand:VCVTF 2 "register_operand" "w")
688		 (match_operand:VCVTF 3 "register_operand" "0")))]
689  "TARGET_NEON && TARGET_FMA"
690  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
691  [(set_attr "type" "neon_fp_mla_s<q>")]
692)
693
694(define_insn "fma<VH:mode>4"
695 [(set (match_operand:VH 0 "register_operand" "=w")
696   (fma:VH
697    (match_operand:VH 1 "register_operand" "w")
698    (match_operand:VH 2 "register_operand" "w")
699    (match_operand:VH 3 "register_operand" "0")))]
700 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
701 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
702 [(set_attr "type" "neon_fp_mla_s<q>")]
703)
704
705(define_insn "fma<VH:mode>4_intrinsic"
706 [(set (match_operand:VH 0 "register_operand" "=w")
707   (fma:VH
708    (match_operand:VH 1 "register_operand" "w")
709    (match_operand:VH 2 "register_operand" "w")
710    (match_operand:VH 3 "register_operand" "0")))]
711 "TARGET_NEON_FP16INST"
712 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713 [(set_attr "type" "neon_fp_mla_s<q>")]
714)
715
716(define_insn "*fmsub<VCVTF:mode>4"
717  [(set (match_operand:VCVTF 0 "register_operand" "=w")
718        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
719		   (match_operand:VCVTF 2 "register_operand" "w")
720		   (match_operand:VCVTF 3 "register_operand" "0")))]
721  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
722  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
723  [(set_attr "type" "neon_fp_mla_s<q>")]
724)
725
726(define_insn "fmsub<VCVTF:mode>4_intrinsic"
727 [(set (match_operand:VCVTF 0 "register_operand" "=w")
728   (fma:VCVTF
729    (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
730    (match_operand:VCVTF 2 "register_operand" "w")
731    (match_operand:VCVTF 3 "register_operand" "0")))]
732 "TARGET_NEON && TARGET_FMA"
733 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
734 [(set_attr "type" "neon_fp_mla_s<q>")]
735)
736
737(define_insn "fmsub<VH:mode>4_intrinsic"
738 [(set (match_operand:VH 0 "register_operand" "=w")
739   (fma:VH
740    (neg:VH (match_operand:VH 1 "register_operand" "w"))
741    (match_operand:VH 2 "register_operand" "w")
742    (match_operand:VH 3 "register_operand" "0")))]
743 "TARGET_NEON_FP16INST"
744 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
745 [(set_attr "type" "neon_fp_mla_s<q>")]
746)
747
748(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
749  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
750        (unspec:VCVTF [(match_operand:VCVTF 1
751		         "s_register_operand" "w")]
752		NEON_VRINT))]
753  "TARGET_NEON && TARGET_VFP5"
754  "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
755  [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
756)
757
758(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
759  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
760	(FIXUORS:<V_cmp_result> (unspec:VCVTF
761			       [(match_operand:VCVTF 1 "register_operand" "w")]
762			       NEON_VCVT)))]
763  "TARGET_NEON && TARGET_VFP5"
764  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
765  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
766   (set_attr "predicable" "no")]
767)
768
769(define_insn "ior<mode>3"
770  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
771	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
772		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
773  "TARGET_NEON"
774{
775  switch (which_alternative)
776    {
777    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
778    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
779		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
780    default: gcc_unreachable ();
781    }
782}
783  [(set_attr "type" "neon_logic<q>")]
784)
785
786;; The concrete forms of the Neon immediate-logic instructions are vbic and
787;; vorr. We support the pseudo-instruction vand instead, because that
788;; corresponds to the canonical form the middle-end expects to use for
789;; immediate bitwise-ANDs.
790
791(define_insn "and<mode>3"
792  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
793	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
794		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
795  "TARGET_NEON"
796{
797  switch (which_alternative)
798    {
799    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
800    case 1: return neon_output_logic_immediate ("vand", &operands[2],
801    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
802    default: gcc_unreachable ();
803    }
804}
805  [(set_attr "type" "neon_logic<q>")]
806)
807
808(define_insn "orn<mode>3_neon"
809  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
810	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
811		 (match_operand:VDQ 1 "s_register_operand" "w")))]
812  "TARGET_NEON"
813  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
814  [(set_attr "type" "neon_logic<q>")]
815)
816
817(define_insn "bic<mode>3_neon"
818  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
819	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
820		 (match_operand:VDQ 1 "s_register_operand" "w")))]
821  "TARGET_NEON"
822  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
823  [(set_attr "type" "neon_logic<q>")]
824)
825
826(define_insn "xor<mode>3"
827  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
828	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
829		 (match_operand:VDQ 2 "s_register_operand" "w")))]
830  "TARGET_NEON"
831  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
832  [(set_attr "type" "neon_logic<q>")]
833)
834
835(define_insn "one_cmpl<mode>2"
836  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
837        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
838  "TARGET_NEON"
839  "vmvn\t%<V_reg>0, %<V_reg>1"
840  [(set_attr "type" "neon_move<q>")]
841)
842
843(define_insn "abs<mode>2"
844  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
845	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
846  "TARGET_NEON"
847  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
848  [(set (attr "type")
849      (if_then_else (match_test "<Is_float_mode>")
850                    (const_string "neon_fp_abs_s<q>")
851                    (const_string "neon_abs<q>")))]
852)
853
854(define_insn "neg<mode>2"
855  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
856	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
857  "TARGET_NEON"
858  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
859  [(set (attr "type")
860      (if_then_else (match_test "<Is_float_mode>")
861                    (const_string "neon_fp_neg_s<q>")
862                    (const_string "neon_neg<q>")))]
863)
864
865(define_insn "<absneg_str><mode>2"
866  [(set (match_operand:VH 0 "s_register_operand" "=w")
867    (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
868 "TARGET_NEON_FP16INST"
869 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
870 [(set_attr "type" "neon_abs<q>")]
871)
872
873(define_expand "neon_v<absneg_str><mode>"
874 [(set
875   (match_operand:VH 0 "s_register_operand")
876   (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
877 "TARGET_NEON_FP16INST"
878{
879  emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
880  DONE;
881})
882
883(define_insn "neon_v<fp16_rnd_str><mode>"
884  [(set (match_operand:VH 0 "s_register_operand" "=w")
885    (unspec:VH
886     [(match_operand:VH 1 "s_register_operand" "w")]
887     FP16_RND))]
888 "TARGET_NEON_FP16INST"
889 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
890 [(set_attr "type" "neon_fp_round_s<q>")]
891)
892
893(define_insn "neon_vrsqrte<mode>"
894  [(set (match_operand:VH 0 "s_register_operand" "=w")
895    (unspec:VH
896     [(match_operand:VH 1 "s_register_operand" "w")]
897     UNSPEC_VRSQRTE))]
898  "TARGET_NEON_FP16INST"
899  "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
900 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
901)
902
903(define_insn "*umin<mode>3_neon"
904  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
905	(umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
906		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
907  "TARGET_NEON"
908  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
909  [(set_attr "type" "neon_minmax<q>")]
910)
911
912(define_insn "*umax<mode>3_neon"
913  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
914	(umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
915		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
916  "TARGET_NEON"
917  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
918  [(set_attr "type" "neon_minmax<q>")]
919)
920
921(define_insn "*smin<mode>3_neon"
922  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
923	(smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
924		   (match_operand:VDQW 2 "s_register_operand" "w")))]
925  "TARGET_NEON"
926  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
927  [(set (attr "type")
928      (if_then_else (match_test "<Is_float_mode>")
929                    (const_string "neon_fp_minmax_s<q>")
930                    (const_string "neon_minmax<q>")))]
931)
932
933(define_insn "*smax<mode>3_neon"
934  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
935	(smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
936		   (match_operand:VDQW 2 "s_register_operand" "w")))]
937  "TARGET_NEON"
938  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
939  [(set (attr "type")
940      (if_then_else (match_test "<Is_float_mode>")
941                    (const_string "neon_fp_minmax_s<q>")
942                    (const_string "neon_minmax<q>")))]
943)
944
945; TODO: V2DI shifts are current disabled because there are bugs in the
946; generic vectorizer code.  It ends up creating a V2DI constructor with
947; SImode elements.
948
949(define_insn "vashl<mode>3"
950  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
951	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
952		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
953  "TARGET_NEON"
954  {
955    switch (which_alternative)
956      {
957        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
958        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
959                         			    <MODE>mode,
960						    VALID_NEON_QREG_MODE (<MODE>mode),
961						    true);
962        default: gcc_unreachable ();
963      }
964  }
965  [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
966)
967
968(define_insn "vashr<mode>3_imm"
969  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
970	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
971			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
972  "TARGET_NEON"
973  {
974    return neon_output_shift_immediate ("vshr", 's', &operands[2],
975					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
976					false);
977  }
978  [(set_attr "type" "neon_shift_imm<q>")]
979)
980
981(define_insn "vlshr<mode>3_imm"
982  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
983	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
984			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
985  "TARGET_NEON"
986  {
987    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
988					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
989					false);
990  }
991  [(set_attr "type" "neon_shift_imm<q>")]
992)
993
994; Used for implementing logical shift-right, which is a left-shift by a negative
995; amount, with signed operands. This is essentially the same as ashl<mode>3
996; above, but using an unspec in case GCC tries anything tricky with negative
997; shift amounts.
998
999(define_insn "ashl<mode>3_signed"
1000  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1001	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1002		      (match_operand:VDQI 2 "s_register_operand" "w")]
1003		     UNSPEC_ASHIFT_SIGNED))]
1004  "TARGET_NEON"
1005  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1006  [(set_attr "type" "neon_shift_reg<q>")]
1007)
1008
1009; Used for implementing logical shift-right, which is a left-shift by a negative
1010; amount, with unsigned operands.
1011
1012(define_insn "ashl<mode>3_unsigned"
1013  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1014	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1015		      (match_operand:VDQI 2 "s_register_operand" "w")]
1016		     UNSPEC_ASHIFT_UNSIGNED))]
1017  "TARGET_NEON"
1018  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1019  [(set_attr "type" "neon_shift_reg<q>")]
1020)
1021
1022(define_expand "vashr<mode>3"
1023  [(set (match_operand:VDQIW 0 "s_register_operand")
1024	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1025			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1026  "TARGET_NEON"
1027{
1028  if (s_register_operand (operands[2], <MODE>mode))
1029    {
1030      rtx neg = gen_reg_rtx (<MODE>mode);
1031      emit_insn (gen_neg<mode>2 (neg, operands[2]));
1032      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1033    }
1034  else
1035    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1036  DONE;
1037})
1038
1039(define_expand "vlshr<mode>3"
1040  [(set (match_operand:VDQIW 0 "s_register_operand")
1041	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1042			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1043  "TARGET_NEON"
1044{
1045  if (s_register_operand (operands[2], <MODE>mode))
1046    {
1047      rtx neg = gen_reg_rtx (<MODE>mode);
1048      emit_insn (gen_neg<mode>2 (neg, operands[2]));
1049      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1050    }
1051  else
1052    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1053  DONE;
1054})
1055
1056;; 64-bit shifts
1057
1058;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1059;; leaving the upper half uninitalized.  This is OK since the shift
1060;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1061;; data flow analysis however, we pretend the full register is set
1062;; using an unspec.
1063(define_insn "neon_load_count"
1064  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1065        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1066                   UNSPEC_LOAD_COUNT))]
1067  "TARGET_NEON"
1068  "@
1069   vld1.32\t{%P0[0]}, %A1
1070   vmov.32\t%P0[0], %1"
1071  [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1072)
1073
1074;; Widening operations
1075
1076(define_expand "widen_ssum<mode>3"
1077  [(set (match_operand:<V_double_width> 0 "s_register_operand")
1078	(plus:<V_double_width>
1079	 (sign_extend:<V_double_width>
1080	  (match_operand:VQI 1 "s_register_operand"))
1081	 (match_operand:<V_double_width> 2 "s_register_operand")))]
1082  "TARGET_NEON"
1083  {
1084    machine_mode mode = GET_MODE (operands[1]);
1085    rtx p1, p2;
1086
1087    p1  = arm_simd_vect_par_cnst_half (mode, false);
1088    p2  = arm_simd_vect_par_cnst_half (mode, true);
1089
1090    if (operands[0] != operands[2])
1091      emit_move_insn (operands[0], operands[2]);
1092
1093    emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1094							 operands[1],
1095							 p1,
1096							 operands[0]));
1097    emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1098							 operands[1],
1099							 p2,
1100							 operands[0]));
1101    DONE;
1102  }
1103)
1104
1105(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1106  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1107	(plus:<V_double_width>
1108	 (sign_extend:<V_double_width>
1109	  (vec_select:<V_HALF>
1110	   (match_operand:VQI 1 "s_register_operand" "%w")
1111	   (match_operand:VQI 2 "vect_par_constant_low" "")))
1112	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1113  "TARGET_NEON"
1114{
1115  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1116    "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1117}
1118  [(set_attr "type" "neon_add_widen")])
1119
1120(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1121  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1122	(plus:<V_double_width>
1123	 (sign_extend:<V_double_width>
1124	  (vec_select:<V_HALF>
1125			 (match_operand:VQI 1 "s_register_operand" "%w")
1126			 (match_operand:VQI 2 "vect_par_constant_high" "")))
1127	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1128  "TARGET_NEON"
1129{
1130  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1131    "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1132}
1133  [(set_attr "type" "neon_add_widen")])
1134
1135(define_insn "widen_ssum<mode>3"
1136  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1137	(plus:<V_widen>
1138	 (sign_extend:<V_widen>
1139	  (match_operand:VW 1 "s_register_operand" "%w"))
1140	 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1141  "TARGET_NEON"
1142  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1143  [(set_attr "type" "neon_add_widen")]
1144)
1145
1146(define_expand "widen_usum<mode>3"
1147  [(set (match_operand:<V_double_width> 0 "s_register_operand")
1148	(plus:<V_double_width>
1149	 (zero_extend:<V_double_width>
1150	  (match_operand:VQI 1 "s_register_operand"))
1151	 (match_operand:<V_double_width> 2 "s_register_operand")))]
1152  "TARGET_NEON"
1153  {
1154    machine_mode mode = GET_MODE (operands[1]);
1155    rtx p1, p2;
1156
1157    p1  = arm_simd_vect_par_cnst_half (mode, false);
1158    p2  = arm_simd_vect_par_cnst_half (mode, true);
1159
1160    if (operands[0] != operands[2])
1161      emit_move_insn (operands[0], operands[2]);
1162
1163    emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1164							 operands[1],
1165							 p1,
1166							 operands[0]));
1167    emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1168							 operands[1],
1169							 p2,
1170							 operands[0]));
1171    DONE;
1172  }
1173)
1174
1175(define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1176  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1177	(plus:<V_double_width>
1178	 (zero_extend:<V_double_width>
1179	  (vec_select:<V_HALF>
1180	   (match_operand:VQI 1 "s_register_operand" "%w")
1181	   (match_operand:VQI 2 "vect_par_constant_low" "")))
1182	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1183  "TARGET_NEON"
1184{
1185  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1186    "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1187}
1188  [(set_attr "type" "neon_add_widen")])
1189
1190(define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1191  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1192	(plus:<V_double_width>
1193	 (zero_extend:<V_double_width>
1194	  (vec_select:<V_HALF>
1195			 (match_operand:VQI 1 "s_register_operand" "%w")
1196			 (match_operand:VQI 2 "vect_par_constant_high" "")))
1197	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1198  "TARGET_NEON"
1199{
1200 return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1201    "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1202}
1203  [(set_attr "type" "neon_add_widen")])
1204
1205(define_insn "widen_usum<mode>3"
1206  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1207	(plus:<V_widen> (zero_extend:<V_widen>
1208			  (match_operand:VW 1 "s_register_operand" "%w"))
1209		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1210  "TARGET_NEON"
1211  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1212  [(set_attr "type" "neon_add_widen")]
1213)
1214
1215;; Helpers for quad-word reduction operations
1216
1217; Add (or smin, smax...) the low N/2 elements of the N-element vector
1218; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1219; N/2-element vector.
1220
1221(define_insn "quad_halves_<code>v4si"
1222  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1223        (VQH_OPS:V2SI
1224          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1225                           (parallel [(const_int 0) (const_int 1)]))
1226          (vec_select:V2SI (match_dup 1)
1227                           (parallel [(const_int 2) (const_int 3)]))))]
1228  "TARGET_NEON"
1229  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1230  [(set_attr "vqh_mnem" "<VQH_mnem>")
1231   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1232)
1233
1234(define_insn "quad_halves_<code>v4sf"
1235  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1236        (VQHS_OPS:V2SF
1237          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1238                           (parallel [(const_int 0) (const_int 1)]))
1239          (vec_select:V2SF (match_dup 1)
1240                           (parallel [(const_int 2) (const_int 3)]))))]
1241  "TARGET_NEON && flag_unsafe_math_optimizations"
1242  "<VQH_mnem>.f32\t%P0, %e1, %f1"
1243  [(set_attr "vqh_mnem" "<VQH_mnem>")
1244   (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1245)
1246
1247(define_insn "quad_halves_<code>v8hi"
1248  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1249        (VQH_OPS:V4HI
1250          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1251                           (parallel [(const_int 0) (const_int 1)
1252				      (const_int 2) (const_int 3)]))
1253          (vec_select:V4HI (match_dup 1)
1254                           (parallel [(const_int 4) (const_int 5)
1255				      (const_int 6) (const_int 7)]))))]
1256  "TARGET_NEON"
1257  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1258  [(set_attr "vqh_mnem" "<VQH_mnem>")
1259   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1260)
1261
1262(define_insn "quad_halves_<code>v16qi"
1263  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1264        (VQH_OPS:V8QI
1265          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1266                           (parallel [(const_int 0) (const_int 1)
1267				      (const_int 2) (const_int 3)
1268				      (const_int 4) (const_int 5)
1269				      (const_int 6) (const_int 7)]))
1270          (vec_select:V8QI (match_dup 1)
1271                           (parallel [(const_int 8) (const_int 9)
1272				      (const_int 10) (const_int 11)
1273				      (const_int 12) (const_int 13)
1274				      (const_int 14) (const_int 15)]))))]
1275  "TARGET_NEON"
1276  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1277  [(set_attr "vqh_mnem" "<VQH_mnem>")
1278   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1279)
1280
1281(define_expand "move_hi_quad_<mode>"
1282 [(match_operand:ANY128 0 "s_register_operand")
1283  (match_operand:<V_HALF> 1 "s_register_operand")]
1284 "TARGET_NEON"
1285{
1286  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1287				       GET_MODE_SIZE (<V_HALF>mode)),
1288		  operands[1]);
1289  DONE;
1290})
1291
1292(define_expand "move_lo_quad_<mode>"
1293 [(match_operand:ANY128 0 "s_register_operand")
1294  (match_operand:<V_HALF> 1 "s_register_operand")]
1295 "TARGET_NEON"
1296{
1297  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1298				       <MODE>mode, 0),
1299		  operands[1]);
1300  DONE;
1301})
1302
1303;; Reduction operations
1304
1305(define_expand "reduc_plus_scal_<mode>"
1306  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1307   (match_operand:VD 1 "s_register_operand")]
1308  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1309{
1310  rtx vec = gen_reg_rtx (<MODE>mode);
1311  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1312			&gen_neon_vpadd_internal<mode>);
1313  /* The same result is actually computed into every element.  */
1314  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1315  DONE;
1316})
1317
1318(define_expand "reduc_plus_scal_<mode>"
1319  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1320   (match_operand:VQ 1 "s_register_operand")]
1321  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1322   && !BYTES_BIG_ENDIAN"
1323{
1324  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1325
1326  emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1327  emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1328
1329  DONE;
1330})
1331
1332(define_expand "reduc_plus_scal_v2di"
1333  [(match_operand:DI 0 "nonimmediate_operand")
1334   (match_operand:V2DI 1 "s_register_operand")]
1335  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1336{
1337  rtx vec = gen_reg_rtx (V2DImode);
1338
1339  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1340  emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1341
1342  DONE;
1343})
1344
1345(define_insn "arm_reduc_plus_internal_v2di"
1346  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1347	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1348		     UNSPEC_VPADD))]
1349  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1350  "vadd.i64\t%e0, %e1, %f1"
1351  [(set_attr "type" "neon_add_q")]
1352)
1353
1354(define_expand "reduc_smin_scal_<mode>"
1355  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1356   (match_operand:VD 1 "s_register_operand")]
1357  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1358{
1359  rtx vec = gen_reg_rtx (<MODE>mode);
1360
1361  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1362			&gen_neon_vpsmin<mode>);
1363  /* The result is computed into every element of the vector.  */
1364  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1365  DONE;
1366})
1367
1368(define_expand "reduc_smin_scal_<mode>"
1369  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1370   (match_operand:VQ 1 "s_register_operand")]
1371  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1372   && !BYTES_BIG_ENDIAN"
1373{
1374  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1375
1376  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1377  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1378
1379  DONE;
1380})
1381
1382(define_expand "reduc_smax_scal_<mode>"
1383  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1384   (match_operand:VD 1 "s_register_operand")]
1385  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1386{
1387  rtx vec = gen_reg_rtx (<MODE>mode);
1388  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1389			&gen_neon_vpsmax<mode>);
1390  /* The result is computed into every element of the vector.  */
1391  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1392  DONE;
1393})
1394
1395(define_expand "reduc_smax_scal_<mode>"
1396  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1397   (match_operand:VQ 1 "s_register_operand")]
1398  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1399   && !BYTES_BIG_ENDIAN"
1400{
1401  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1402
1403  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1404  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1405
1406  DONE;
1407})
1408
1409(define_expand "reduc_umin_scal_<mode>"
1410  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1411   (match_operand:VDI 1 "s_register_operand")]
1412  "TARGET_NEON"
1413{
1414  rtx vec = gen_reg_rtx (<MODE>mode);
1415  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1416			&gen_neon_vpumin<mode>);
1417  /* The result is computed into every element of the vector.  */
1418  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1419  DONE;
1420})
1421
1422(define_expand "reduc_umin_scal_<mode>"
1423  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1424   (match_operand:VQI 1 "s_register_operand")]
1425  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1426{
1427  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1428
1429  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1430  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1431
1432  DONE;
1433})
1434
1435(define_expand "reduc_umax_scal_<mode>"
1436  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1437   (match_operand:VDI 1 "s_register_operand")]
1438  "TARGET_NEON"
1439{
1440  rtx vec = gen_reg_rtx (<MODE>mode);
1441  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1442			&gen_neon_vpumax<mode>);
1443  /* The result is computed into every element of the vector.  */
1444  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1445  DONE;
1446})
1447
1448(define_expand "reduc_umax_scal_<mode>"
1449  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1450   (match_operand:VQI 1 "s_register_operand")]
1451  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1452{
1453  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1454
1455  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1456  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1457
1458  DONE;
1459})
1460
1461(define_insn "neon_vpadd_internal<mode>"
1462  [(set (match_operand:VD 0 "s_register_operand" "=w")
1463	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1464		    (match_operand:VD 2 "s_register_operand" "w")]
1465                   UNSPEC_VPADD))]
1466  "TARGET_NEON"
1467  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1468  ;; Assume this schedules like vadd.
1469  [(set (attr "type")
1470      (if_then_else (match_test "<Is_float_mode>")
1471                    (const_string "neon_fp_reduc_add_s<q>")
1472                    (const_string "neon_reduc_add<q>")))]
1473)
1474
1475(define_insn "neon_vpaddv4hf"
1476 [(set
1477   (match_operand:V4HF 0 "s_register_operand" "=w")
1478   (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1479		 (match_operand:V4HF 2 "s_register_operand" "w")]
1480    UNSPEC_VPADD))]
1481 "TARGET_NEON_FP16INST"
1482 "vpadd.f16\t%P0, %P1, %P2"
1483 [(set_attr "type" "neon_reduc_add")]
1484)
1485
1486(define_insn "neon_vpsmin<mode>"
1487  [(set (match_operand:VD 0 "s_register_operand" "=w")
1488	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1489		    (match_operand:VD 2 "s_register_operand" "w")]
1490                   UNSPEC_VPSMIN))]
1491  "TARGET_NEON"
1492  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1493  [(set (attr "type")
1494      (if_then_else (match_test "<Is_float_mode>")
1495                    (const_string "neon_fp_reduc_minmax_s<q>")
1496                    (const_string "neon_reduc_minmax<q>")))]
1497)
1498
1499(define_insn "neon_vpsmax<mode>"
1500  [(set (match_operand:VD 0 "s_register_operand" "=w")
1501	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1502		    (match_operand:VD 2 "s_register_operand" "w")]
1503                   UNSPEC_VPSMAX))]
1504  "TARGET_NEON"
1505  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1506  [(set (attr "type")
1507      (if_then_else (match_test "<Is_float_mode>")
1508                    (const_string "neon_fp_reduc_minmax_s<q>")
1509                    (const_string "neon_reduc_minmax<q>")))]
1510)
1511
1512(define_insn "neon_vpumin<mode>"
1513  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1514	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1515		     (match_operand:VDI 2 "s_register_operand" "w")]
1516                   UNSPEC_VPUMIN))]
1517  "TARGET_NEON"
1518  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1519  [(set_attr "type" "neon_reduc_minmax<q>")]
1520)
1521
1522(define_insn "neon_vpumax<mode>"
1523  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1524	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1525		     (match_operand:VDI 2 "s_register_operand" "w")]
1526                   UNSPEC_VPUMAX))]
1527  "TARGET_NEON"
1528  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1529  [(set_attr "type" "neon_reduc_minmax<q>")]
1530)
1531
1532;; Saturating arithmetic
1533
1534; NOTE: Neon supports many more saturating variants of instructions than the
1535; following, but these are all GCC currently understands.
1536; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1537; yet either, although these patterns may be used by intrinsics when they're
1538; added.
1539
1540(define_insn "*ss_add<mode>_neon"
1541  [(set (match_operand:VD 0 "s_register_operand" "=w")
1542       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1543                   (match_operand:VD 2 "s_register_operand" "w")))]
1544  "TARGET_NEON"
1545  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1546  [(set_attr "type" "neon_qadd<q>")]
1547)
1548
1549(define_insn "*us_add<mode>_neon"
1550  [(set (match_operand:VD 0 "s_register_operand" "=w")
1551       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1552                   (match_operand:VD 2 "s_register_operand" "w")))]
1553  "TARGET_NEON"
1554  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1555  [(set_attr "type" "neon_qadd<q>")]
1556)
1557
1558(define_insn "*ss_sub<mode>_neon"
1559  [(set (match_operand:VD 0 "s_register_operand" "=w")
1560       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1561                    (match_operand:VD 2 "s_register_operand" "w")))]
1562  "TARGET_NEON"
1563  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1564  [(set_attr "type" "neon_qsub<q>")]
1565)
1566
1567(define_insn "*us_sub<mode>_neon"
1568  [(set (match_operand:VD 0 "s_register_operand" "=w")
1569       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1570                    (match_operand:VD 2 "s_register_operand" "w")))]
1571  "TARGET_NEON"
1572  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1573  [(set_attr "type" "neon_qsub<q>")]
1574)
1575
1576;; Conditional instructions.  These are comparisons with conditional moves for
1577;; vectors.  They perform the assignment:
1578;;
1579;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1580;;
1581;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1582;; element-wise.
1583
1584(define_expand "vcond<mode><mode>"
1585  [(set (match_operand:VDQW 0 "s_register_operand")
1586	(if_then_else:VDQW
1587	  (match_operator 3 "comparison_operator"
1588	    [(match_operand:VDQW 4 "s_register_operand")
1589	     (match_operand:VDQW 5 "nonmemory_operand")])
1590	  (match_operand:VDQW 1 "s_register_operand")
1591	  (match_operand:VDQW 2 "s_register_operand")))]
1592  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1593{
1594  int inverse = 0;
1595  int use_zero_form = 0;
1596  int swap_bsl_operands = 0;
1597  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1598  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1599
1600  rtx (*base_comparison) (rtx, rtx, rtx);
1601  rtx (*complimentary_comparison) (rtx, rtx, rtx);
1602
1603  switch (GET_CODE (operands[3]))
1604    {
1605    case GE:
1606    case GT:
1607    case LE:
1608    case LT:
1609    case EQ:
1610      if (operands[5] == CONST0_RTX (<MODE>mode))
1611	{
1612	  use_zero_form = 1;
1613	  break;
1614	}
1615      /* Fall through.  */
1616    default:
1617      if (!REG_P (operands[5]))
1618	operands[5] = force_reg (<MODE>mode, operands[5]);
1619    }
1620
1621  switch (GET_CODE (operands[3]))
1622    {
1623    case LT:
1624    case UNLT:
1625      inverse = 1;
1626      /* Fall through.  */
1627    case GE:
1628    case UNGE:
1629    case ORDERED:
1630    case UNORDERED:
1631      base_comparison = gen_neon_vcge<mode>;
1632      complimentary_comparison = gen_neon_vcgt<mode>;
1633      break;
1634    case LE:
1635    case UNLE:
1636      inverse = 1;
1637      /* Fall through.  */
1638    case GT:
1639    case UNGT:
1640      base_comparison = gen_neon_vcgt<mode>;
1641      complimentary_comparison = gen_neon_vcge<mode>;
1642      break;
1643    case EQ:
1644    case NE:
1645    case UNEQ:
1646      base_comparison = gen_neon_vceq<mode>;
1647      complimentary_comparison = gen_neon_vceq<mode>;
1648      break;
1649    default:
1650      gcc_unreachable ();
1651    }
1652
1653  switch (GET_CODE (operands[3]))
1654    {
1655    case LT:
1656    case LE:
1657    case GT:
1658    case GE:
1659    case EQ:
1660      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1661	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1662	 a GE b -> a GE b
1663	 a GT b -> a GT b
1664	 a LE b -> b GE a
1665	 a LT b -> b GT a
1666	 a EQ b -> a EQ b
1667	 Note that there also exist direct comparison against 0 forms,
1668	 so catch those as a special case.  */
1669      if (use_zero_form)
1670	{
1671	  inverse = 0;
1672	  switch (GET_CODE (operands[3]))
1673	    {
1674	    case LT:
1675	      base_comparison = gen_neon_vclt<mode>;
1676	      break;
1677	    case LE:
1678	      base_comparison = gen_neon_vcle<mode>;
1679	      break;
1680	    default:
1681	      /* Do nothing, other zero form cases already have the correct
1682		 base_comparison.  */
1683	      break;
1684	    }
1685	}
1686
1687      if (!inverse)
1688	emit_insn (base_comparison (mask, operands[4], operands[5]));
1689      else
1690	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1691      break;
1692    case UNLT:
1693    case UNLE:
1694    case UNGT:
1695    case UNGE:
1696    case NE:
1697      /* Vector compare returns false for lanes which are unordered, so if we use
1698	 the inverse of the comparison we actually want to emit, then
1699	 swap the operands to BSL, we will end up with the correct result.
1700	 Note that a NE NaN and NaN NE b are true for all a, b.
1701
1702	 Our transformations are:
1703	 a GE b -> !(b GT a)
1704	 a GT b -> !(b GE a)
1705	 a LE b -> !(a GT b)
1706	 a LT b -> !(a GE b)
1707	 a NE b -> !(a EQ b)  */
1708
1709      if (inverse)
1710	emit_insn (base_comparison (mask, operands[4], operands[5]));
1711      else
1712	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1713
1714      swap_bsl_operands = 1;
1715      break;
1716    case UNEQ:
1717      /* We check (a > b ||  b > a).  combining these comparisons give us
1718	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1719	 will then give us (a == b ||  a UNORDERED b) as intended.  */
1720
1721      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1722      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1723      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1724      swap_bsl_operands = 1;
1725      break;
1726    case UNORDERED:
1727       /* Operands are ORDERED iff (a > b || b >= a).
1728	 Swapping the operands to BSL will give the UNORDERED case.  */
1729     swap_bsl_operands = 1;
1730     /* Fall through.  */
1731    case ORDERED:
1732      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1733      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1734      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1735      break;
1736    default:
1737      gcc_unreachable ();
1738    }
1739
1740  if (swap_bsl_operands)
1741    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1742				    operands[1]));
1743  else
1744    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1745				    operands[2]));
1746  DONE;
1747})
1748
1749(define_expand "vcondu<mode><mode>"
1750  [(set (match_operand:VDQIW 0 "s_register_operand")
1751	(if_then_else:VDQIW
1752	  (match_operator 3 "arm_comparison_operator"
1753	    [(match_operand:VDQIW 4 "s_register_operand")
1754	     (match_operand:VDQIW 5 "s_register_operand")])
1755	  (match_operand:VDQIW 1 "s_register_operand")
1756	  (match_operand:VDQIW 2 "s_register_operand")))]
1757  "TARGET_NEON"
1758{
1759  rtx mask;
1760  int inverse = 0, immediate_zero = 0;
1761
1762  mask = gen_reg_rtx (<V_cmp_result>mode);
1763
1764  if (operands[5] == CONST0_RTX (<MODE>mode))
1765    immediate_zero = 1;
1766  else if (!REG_P (operands[5]))
1767    operands[5] = force_reg (<MODE>mode, operands[5]);
1768
1769  switch (GET_CODE (operands[3]))
1770    {
1771    case GEU:
1772      emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1773      break;
1774
1775    case GTU:
1776      emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1777      break;
1778
1779    case EQ:
1780      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1781      break;
1782
1783    case LEU:
1784      if (immediate_zero)
1785	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1786      else
1787	emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1788      break;
1789
1790    case LTU:
1791      if (immediate_zero)
1792        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1793      else
1794	emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1795      break;
1796
1797    case NE:
1798      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1799      inverse = 1;
1800      break;
1801
1802    default:
1803      gcc_unreachable ();
1804    }
1805
1806  if (inverse)
1807    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1808				    operands[1]));
1809  else
1810    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1811				    operands[2]));
1812
1813  DONE;
1814})
1815
1816;; Patterns for builtins.
1817
1818; good for plain vadd, vaddq.
1819
1820(define_expand "neon_vadd<mode>"
1821  [(match_operand:VCVTF 0 "s_register_operand")
1822   (match_operand:VCVTF 1 "s_register_operand")
1823   (match_operand:VCVTF 2 "s_register_operand")]
1824  "TARGET_NEON"
1825{
1826  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1827    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1828  else
1829    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1830					   operands[2]));
1831  DONE;
1832})
1833
1834(define_expand "neon_vadd<mode>"
1835  [(match_operand:VH 0 "s_register_operand")
1836   (match_operand:VH 1 "s_register_operand")
1837   (match_operand:VH 2 "s_register_operand")]
1838  "TARGET_NEON_FP16INST"
1839{
1840  emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
1841  DONE;
1842})
1843
1844(define_expand "neon_vsub<mode>"
1845  [(match_operand:VH 0 "s_register_operand")
1846   (match_operand:VH 1 "s_register_operand")
1847   (match_operand:VH 2 "s_register_operand")]
1848  "TARGET_NEON_FP16INST"
1849{
1850  emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
1851  DONE;
1852})
1853
1854; Note that NEON operations don't support the full IEEE 754 standard: in
1855; particular, denormal values are flushed to zero.  This means that GCC cannot
1856; use those instructions for autovectorization, etc. unless
1857; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1858; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
1859; header) must work in either case: if -funsafe-math-optimizations is given,
1860; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1861; expand to unspecs (which may potentially limit the extent to which they might
1862; be optimized by generic code).
1863
1864; Used for intrinsics when flag_unsafe_math_optimizations is false.
1865
1866(define_insn "neon_vadd<mode>_unspec"
1867  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1868        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1869		      (match_operand:VCVTF 2 "s_register_operand" "w")]
1870                     UNSPEC_VADD))]
1871  "TARGET_NEON"
1872  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1873  [(set (attr "type")
1874      (if_then_else (match_test "<Is_float_mode>")
1875                    (const_string "neon_fp_addsub_s<q>")
1876                    (const_string "neon_add<q>")))]
1877)
1878
1879(define_insn "neon_vaddl<sup><mode>"
1880  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1881        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1882		           (match_operand:VDI 2 "s_register_operand" "w")]
1883                          VADDL))]
1884  "TARGET_NEON"
1885  "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1886  [(set_attr "type" "neon_add_long")]
1887)
1888
1889(define_insn "neon_vaddw<sup><mode>"
1890  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1891        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1892		           (match_operand:VDI 2 "s_register_operand" "w")]
1893                          VADDW))]
1894  "TARGET_NEON"
1895  "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1896  [(set_attr "type" "neon_add_widen")]
1897)
1898
1899; vhadd and vrhadd.
1900
1901(define_insn "neon_v<r>hadd<sup><mode>"
1902  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1903        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1904		       (match_operand:VDQIW 2 "s_register_operand" "w")]
1905		      VHADD))]
1906  "TARGET_NEON"
1907  "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1908  [(set_attr "type" "neon_add_halve_q")]
1909)
1910
1911(define_insn "neon_vqadd<sup><mode>"
1912  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1913        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1914		       (match_operand:VDQIX 2 "s_register_operand" "w")]
1915                     VQADD))]
1916  "TARGET_NEON"
1917  "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1918  [(set_attr "type" "neon_qadd<q>")]
1919)
1920
1921(define_insn "neon_v<r>addhn<mode>"
1922  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1923        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1924		            (match_operand:VN 2 "s_register_operand" "w")]
1925                           VADDHN))]
1926  "TARGET_NEON"
1927  "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1928  [(set_attr "type" "neon_add_halve_narrow_q")]
1929)
1930
1931;; Polynomial and Float multiplication.
1932(define_insn "neon_vmul<pf><mode>"
1933  [(set (match_operand:VPF 0 "s_register_operand" "=w")
1934        (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1935		      (match_operand:VPF 2 "s_register_operand" "w")]
1936		     UNSPEC_VMUL))]
1937  "TARGET_NEON"
1938  "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1939  [(set (attr "type")
1940      (if_then_else (match_test "<Is_float_mode>")
1941                    (const_string "neon_fp_mul_s<q>")
1942                    (const_string "neon_mul_<V_elem_ch><q>")))]
1943)
1944
1945(define_insn "mul<mode>3"
1946 [(set
1947   (match_operand:VH 0 "s_register_operand" "=w")
1948   (mult:VH
1949    (match_operand:VH 1 "s_register_operand" "w")
1950    (match_operand:VH 2 "s_register_operand" "w")))]
1951  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
1952  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1953 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1954)
1955
1956(define_insn "neon_vmulf<mode>"
1957 [(set
1958   (match_operand:VH 0 "s_register_operand" "=w")
1959   (mult:VH
1960    (match_operand:VH 1 "s_register_operand" "w")
1961    (match_operand:VH 2 "s_register_operand" "w")))]
1962  "TARGET_NEON_FP16INST"
1963  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1964 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1965)
1966
1967(define_expand "neon_vmla<mode>"
1968  [(match_operand:VDQW 0 "s_register_operand")
1969   (match_operand:VDQW 1 "s_register_operand")
1970   (match_operand:VDQW 2 "s_register_operand")
1971   (match_operand:VDQW 3 "s_register_operand")]
1972  "TARGET_NEON"
1973{
1974  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1975    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1976				             operands[2], operands[3]));
1977  else
1978    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1979					   operands[2], operands[3]));
1980  DONE;
1981})
1982
1983(define_expand "neon_vfma<VCVTF:mode>"
1984  [(match_operand:VCVTF 0 "s_register_operand")
1985   (match_operand:VCVTF 1 "s_register_operand")
1986   (match_operand:VCVTF 2 "s_register_operand")
1987   (match_operand:VCVTF 3 "s_register_operand")]
1988  "TARGET_NEON && TARGET_FMA"
1989{
1990  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1991				       operands[1]));
1992  DONE;
1993})
1994
1995(define_expand "neon_vfma<VH:mode>"
1996  [(match_operand:VH 0 "s_register_operand")
1997   (match_operand:VH 1 "s_register_operand")
1998   (match_operand:VH 2 "s_register_operand")
1999   (match_operand:VH 3 "s_register_operand")]
2000  "TARGET_NEON_FP16INST"
2001{
2002  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2003				       operands[1]));
2004  DONE;
2005})
2006
2007(define_expand "neon_vfms<VCVTF:mode>"
2008  [(match_operand:VCVTF 0 "s_register_operand")
2009   (match_operand:VCVTF 1 "s_register_operand")
2010   (match_operand:VCVTF 2 "s_register_operand")
2011   (match_operand:VCVTF 3 "s_register_operand")]
2012  "TARGET_NEON && TARGET_FMA"
2013{
2014  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2015					 operands[1]));
2016  DONE;
2017})
2018
2019(define_expand "neon_vfms<VH:mode>"
2020  [(match_operand:VH 0 "s_register_operand")
2021   (match_operand:VH 1 "s_register_operand")
2022   (match_operand:VH 2 "s_register_operand")
2023   (match_operand:VH 3 "s_register_operand")]
2024  "TARGET_NEON_FP16INST"
2025{
2026  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2027					 operands[1]));
2028  DONE;
2029})
2030
2031;; The expand RTL structure here is not important.
2032;; We use the gen_* functions anyway.
2033;; We just need something to wrap the iterators around.
2034
2035(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2036  [(set (match_operand:VCVTF 0 "s_register_operand")
2037     (unspec:VCVTF
2038	[(match_operand:VCVTF 1 "s_register_operand")
2039	   (PLUSMINUS:<VFML>
2040	     (match_operand:<VFML> 2 "s_register_operand")
2041	     (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2042  "TARGET_FP16FML"
2043{
2044  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2045  emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2046							     operands[1],
2047							     operands[2],
2048							     operands[3],
2049							     half, half));
2050  DONE;
2051})
2052
2053(define_insn "vfmal_low<mode>_intrinsic"
2054 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2055	(fma:VCVTF
2056	 (float_extend:VCVTF
2057	  (vec_select:<VFMLSEL>
2058	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2059	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2060	 (float_extend:VCVTF
2061	  (vec_select:<VFMLSEL>
2062	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2063	   (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2064	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2065 "TARGET_FP16FML"
2066 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2067 [(set_attr "type" "neon_fp_mla_s<q>")]
2068)
2069
2070(define_insn "vfmsl_high<mode>_intrinsic"
2071 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2072	(fma:VCVTF
2073	 (float_extend:VCVTF
2074	  (neg:<VFMLSEL>
2075	    (vec_select:<VFMLSEL>
2076	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2077	      (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2078	 (float_extend:VCVTF
2079	  (vec_select:<VFMLSEL>
2080	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2081	   (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2082	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2083 "TARGET_FP16FML"
2084 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2085 [(set_attr "type" "neon_fp_mla_s<q>")]
2086)
2087
2088(define_insn "vfmal_high<mode>_intrinsic"
2089 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2090	(fma:VCVTF
2091	 (float_extend:VCVTF
2092	  (vec_select:<VFMLSEL>
2093	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2094	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2095	 (float_extend:VCVTF
2096	  (vec_select:<VFMLSEL>
2097	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2098	   (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2099	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2100 "TARGET_FP16FML"
2101 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2102 [(set_attr "type" "neon_fp_mla_s<q>")]
2103)
2104
2105(define_insn "vfmsl_low<mode>_intrinsic"
2106 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2107	(fma:VCVTF
2108	 (float_extend:VCVTF
2109	  (neg:<VFMLSEL>
2110	    (vec_select:<VFMLSEL>
2111	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2112	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2113	 (float_extend:VCVTF
2114	  (vec_select:<VFMLSEL>
2115	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2116	   (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2117	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2118 "TARGET_FP16FML"
2119 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2120 [(set_attr "type" "neon_fp_mla_s<q>")]
2121)
2122
2123(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2124  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2125     (unspec:VCVTF
2126	[(match_operand:VCVTF 1 "s_register_operand")
2127	 (PLUSMINUS:<VFML>
2128	   (match_operand:<VFML> 2 "s_register_operand")
2129	   (match_operand:<VFML> 3 "s_register_operand"))
2130	 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2131  "TARGET_FP16FML"
2132{
2133  rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2134  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2135  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2136					       (operands[0], operands[1],
2137						operands[2], operands[3],
2138						half, lane));
2139  DONE;
2140})
2141
2142(define_insn "vfmal_lane_low<mode>_intrinsic"
2143 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2144	(fma:VCVTF
2145	 (float_extend:VCVTF
2146	  (vec_select:<VFMLSEL>
2147	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2148	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2149	 (float_extend:VCVTF
2150	   (vec_duplicate:<VFMLSEL>
2151	     (vec_select:HF
2152	       (match_operand:<VFML> 3 "s_register_operand" "x")
2153	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2154	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2155 "TARGET_FP16FML"
2156 {
2157    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2158    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2159      {
2160	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2161	return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2162      }
2163    else
2164      {
2165	operands[5] = GEN_INT (lane);
2166	return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2167      }
2168  }
2169 [(set_attr "type" "neon_fp_mla_s<q>")]
2170)
2171
2172(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2173  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2174     (unspec:VCVTF
2175	[(match_operand:VCVTF 1 "s_register_operand")
2176	 (PLUSMINUS:<VFML>
2177	   (match_operand:<VFML> 2 "s_register_operand")
2178	   (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2179	 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2180  "TARGET_FP16FML"
2181{
2182  rtx lane
2183    = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2184  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2185  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2186		(operands[0], operands[1], operands[2], operands[3],
2187		 half, lane));
2188  DONE;
2189})
2190
2191;; Used to implement the intrinsics:
2192;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2193;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2194;; Needs a bit of care to get the modes of the different sub-expressions right
2195;; due to 'a' and 'b' having different sizes and make sure we use the right
2196;; S or D subregister to select the appropriate lane from.
2197
2198(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2199 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2200	(fma:VCVTF
2201	 (float_extend:VCVTF
2202	  (vec_select:<VFMLSEL>
2203	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2204	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2205	 (float_extend:VCVTF
2206	   (vec_duplicate:<VFMLSEL>
2207	     (vec_select:HF
2208	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2209	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2210	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2211 "TARGET_FP16FML"
2212 {
2213   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2214   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2215   int new_lane = lane % elts_per_reg;
2216   int regdiff = lane / elts_per_reg;
2217   operands[5] = GEN_INT (new_lane);
2218   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2219      because we want the print_operand code to print the appropriate
2220      S or D register prefix.  */
2221   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2222   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2223   return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2224 }
2225 [(set_attr "type" "neon_fp_mla_s<q>")]
2226)
2227
2228;; Used to implement the intrinsics:
2229;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2230;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2231;; Needs a bit of care to get the modes of the different sub-expressions right
2232;; due to 'a' and 'b' having different sizes and make sure we use the right
2233;; S or D subregister to select the appropriate lane from.
2234
2235(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2236 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2237	(fma:VCVTF
2238	 (float_extend:VCVTF
2239	  (vec_select:<VFMLSEL>
2240	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2241	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2242	 (float_extend:VCVTF
2243	   (vec_duplicate:<VFMLSEL>
2244	     (vec_select:HF
2245	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2246	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2247	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2248 "TARGET_FP16FML"
2249 {
2250   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2251   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2252   int new_lane = lane % elts_per_reg;
2253   int regdiff = lane / elts_per_reg;
2254   operands[5] = GEN_INT (new_lane);
2255   /* We re-create operands[3] in the halved VFMLSEL mode
2256      because we've calculated the correct half-width subreg to extract
2257      the lane from and we want to print *that* subreg instead.  */
2258   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2259   return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2260 }
2261 [(set_attr "type" "neon_fp_mla_s<q>")]
2262)
2263
2264(define_insn "vfmal_lane_high<mode>_intrinsic"
2265 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2266	(fma:VCVTF
2267	 (float_extend:VCVTF
2268	  (vec_select:<VFMLSEL>
2269	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2270	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2271	 (float_extend:VCVTF
2272	   (vec_duplicate:<VFMLSEL>
2273	     (vec_select:HF
2274	       (match_operand:<VFML> 3 "s_register_operand" "x")
2275	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2276	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2277 "TARGET_FP16FML"
2278  {
2279    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2280    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2281      {
2282	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2283	return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2284      }
2285    else
2286      {
2287	operands[5] = GEN_INT (lane);
2288	return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2289      }
2290  }
2291 [(set_attr "type" "neon_fp_mla_s<q>")]
2292)
2293
2294(define_insn "vfmsl_lane_low<mode>_intrinsic"
2295 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2296	(fma:VCVTF
2297	 (float_extend:VCVTF
2298	  (neg:<VFMLSEL>
2299	    (vec_select:<VFMLSEL>
2300	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2301	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2302	 (float_extend:VCVTF
2303	   (vec_duplicate:<VFMLSEL>
2304	     (vec_select:HF
2305	       (match_operand:<VFML> 3 "s_register_operand" "x")
2306	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2307	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2308 "TARGET_FP16FML"
2309 {
2310    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2311    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2312      {
2313	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2314	return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2315      }
2316    else
2317      {
2318	operands[5] = GEN_INT (lane);
2319	return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2320      }
2321  }
2322 [(set_attr "type" "neon_fp_mla_s<q>")]
2323)
2324
2325;; Used to implement the intrinsics:
2326;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2327;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2328;; Needs a bit of care to get the modes of the different sub-expressions right
2329;; due to 'a' and 'b' having different sizes and make sure we use the right
2330;; S or D subregister to select the appropriate lane from.
2331
2332(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2333 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2334	(fma:VCVTF
2335	 (float_extend:VCVTF
2336	  (neg:<VFMLSEL>
2337	    (vec_select:<VFMLSEL>
2338	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2339	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2340	 (float_extend:VCVTF
2341	   (vec_duplicate:<VFMLSEL>
2342	     (vec_select:HF
2343	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2344	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2345	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2346 "TARGET_FP16FML"
2347 {
2348   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2349   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2350   int new_lane = lane % elts_per_reg;
2351   int regdiff = lane / elts_per_reg;
2352   operands[5] = GEN_INT (new_lane);
2353   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2354      because we want the print_operand code to print the appropriate
2355      S or D register prefix.  */
2356   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2357   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2358   return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2359 }
2360 [(set_attr "type" "neon_fp_mla_s<q>")]
2361)
2362
2363;; Used to implement the intrinsics:
2364;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2365;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2366;; Needs a bit of care to get the modes of the different sub-expressions right
2367;; due to 'a' and 'b' having different sizes and make sure we use the right
2368;; S or D subregister to select the appropriate lane from.
2369
2370(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2371 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2372	(fma:VCVTF
2373	 (float_extend:VCVTF
2374	  (neg:<VFMLSEL>
2375	    (vec_select:<VFMLSEL>
2376	     (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2377	     (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2378	 (float_extend:VCVTF
2379	   (vec_duplicate:<VFMLSEL>
2380	     (vec_select:HF
2381	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2382	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2383	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2384 "TARGET_FP16FML"
2385 {
2386   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2387   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2388   int new_lane = lane % elts_per_reg;
2389   int regdiff = lane / elts_per_reg;
2390   operands[5] = GEN_INT (new_lane);
2391   /* We re-create operands[3] in the halved VFMLSEL mode
2392      because we've calculated the correct half-width subreg to extract
2393      the lane from and we want to print *that* subreg instead.  */
2394   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2395   return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2396 }
2397 [(set_attr "type" "neon_fp_mla_s<q>")]
2398)
2399
2400(define_insn "vfmsl_lane_high<mode>_intrinsic"
2401 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2402	(fma:VCVTF
2403	 (float_extend:VCVTF
2404	  (neg:<VFMLSEL>
2405	    (vec_select:<VFMLSEL>
2406	     (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2407	     (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2408	 (float_extend:VCVTF
2409	   (vec_duplicate:<VFMLSEL>
2410	     (vec_select:HF
2411	       (match_operand:<VFML> 3 "s_register_operand" "x")
2412	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2413	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2414 "TARGET_FP16FML"
2415  {
2416    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2417    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2418      {
2419	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2420	return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2421      }
2422    else
2423      {
2424	operands[5] = GEN_INT (lane);
2425	return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2426      }
2427  }
2428 [(set_attr "type" "neon_fp_mla_s<q>")]
2429)
2430
2431; Used for intrinsics when flag_unsafe_math_optimizations is false.
2432
2433(define_insn "neon_vmla<mode>_unspec"
2434  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2435	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2436		      (match_operand:VDQW 2 "s_register_operand" "w")
2437		      (match_operand:VDQW 3 "s_register_operand" "w")]
2438		    UNSPEC_VMLA))]
2439  "TARGET_NEON"
2440  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2441  [(set (attr "type")
2442      (if_then_else (match_test "<Is_float_mode>")
2443                    (const_string "neon_fp_mla_s<q>")
2444                    (const_string "neon_mla_<V_elem_ch><q>")))]
2445)
2446
2447(define_insn "neon_vmlal<sup><mode>"
2448  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2449        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2450		           (match_operand:VW 2 "s_register_operand" "w")
2451		           (match_operand:VW 3 "s_register_operand" "w")]
2452                          VMLAL))]
2453  "TARGET_NEON"
2454  "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2455  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2456)
2457
2458(define_expand "neon_vmls<mode>"
2459  [(match_operand:VDQW 0 "s_register_operand")
2460   (match_operand:VDQW 1 "s_register_operand")
2461   (match_operand:VDQW 2 "s_register_operand")
2462   (match_operand:VDQW 3 "s_register_operand")]
2463  "TARGET_NEON"
2464{
2465  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2466    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2467		 operands[1], operands[2], operands[3]));
2468  else
2469    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2470					   operands[2], operands[3]));
2471  DONE;
2472})
2473
2474; Used for intrinsics when flag_unsafe_math_optimizations is false.
2475
2476(define_insn "neon_vmls<mode>_unspec"
2477  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2478	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2479		      (match_operand:VDQW 2 "s_register_operand" "w")
2480		      (match_operand:VDQW 3 "s_register_operand" "w")]
2481		    UNSPEC_VMLS))]
2482  "TARGET_NEON"
2483  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2484  [(set (attr "type")
2485      (if_then_else (match_test "<Is_float_mode>")
2486                    (const_string "neon_fp_mla_s<q>")
2487                    (const_string "neon_mla_<V_elem_ch><q>")))]
2488)
2489
2490(define_insn "neon_vmlsl<sup><mode>"
2491  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2492        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2493		           (match_operand:VW 2 "s_register_operand" "w")
2494		           (match_operand:VW 3 "s_register_operand" "w")]
2495                          VMLSL))]
2496  "TARGET_NEON"
2497  "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2498  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2499)
2500
2501;; vqdmulh, vqrdmulh
2502(define_insn "neon_vq<r>dmulh<mode>"
2503  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2504        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2505		       (match_operand:VMDQI 2 "s_register_operand" "w")]
2506                      VQDMULH))]
2507  "TARGET_NEON"
2508  "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2509  [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2510)
2511
2512;; vqrdmlah, vqrdmlsh
2513(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2514  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2515	(unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2516		       (match_operand:VMDQI 2 "s_register_operand" "w")
2517		       (match_operand:VMDQI 3 "s_register_operand" "w")]
2518		      VQRDMLH_AS))]
2519  "TARGET_NEON_RDMA"
2520  "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2521  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2522)
2523
2524(define_insn "neon_vqdmlal<mode>"
2525  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2526        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2527		           (match_operand:VMDI 2 "s_register_operand" "w")
2528		           (match_operand:VMDI 3 "s_register_operand" "w")]
2529                          UNSPEC_VQDMLAL))]
2530  "TARGET_NEON"
2531  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2532  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2533)
2534
2535(define_insn "neon_vqdmlsl<mode>"
2536  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2537        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2538		           (match_operand:VMDI 2 "s_register_operand" "w")
2539		           (match_operand:VMDI 3 "s_register_operand" "w")]
2540                          UNSPEC_VQDMLSL))]
2541  "TARGET_NEON"
2542  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2543  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2544)
2545
2546(define_insn "neon_vmull<sup><mode>"
2547  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2548        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2549		           (match_operand:VW 2 "s_register_operand" "w")]
2550                          VMULL))]
2551  "TARGET_NEON"
2552  "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2553  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2554)
2555
2556(define_insn "neon_vqdmull<mode>"
2557  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2558        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2559		           (match_operand:VMDI 2 "s_register_operand" "w")]
2560                          UNSPEC_VQDMULL))]
2561  "TARGET_NEON"
2562  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2563  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2564)
2565
2566(define_expand "neon_vsub<mode>"
2567  [(match_operand:VCVTF 0 "s_register_operand")
2568   (match_operand:VCVTF 1 "s_register_operand")
2569   (match_operand:VCVTF 2 "s_register_operand")]
2570  "TARGET_NEON"
2571{
2572  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2573    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2574  else
2575    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2576					   operands[2]));
2577  DONE;
2578})
2579
2580; Used for intrinsics when flag_unsafe_math_optimizations is false.
2581
2582(define_insn "neon_vsub<mode>_unspec"
2583  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2584        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2585		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2586                     UNSPEC_VSUB))]
2587  "TARGET_NEON"
2588  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2589  [(set (attr "type")
2590      (if_then_else (match_test "<Is_float_mode>")
2591                    (const_string "neon_fp_addsub_s<q>")
2592                    (const_string "neon_sub<q>")))]
2593)
2594
2595(define_insn "neon_vsubl<sup><mode>"
2596  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2597        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2598		           (match_operand:VDI 2 "s_register_operand" "w")]
2599                          VSUBL))]
2600  "TARGET_NEON"
2601  "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2602  [(set_attr "type" "neon_sub_long")]
2603)
2604
2605(define_insn "neon_vsubw<sup><mode>"
2606  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2607        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2608		           (match_operand:VDI 2 "s_register_operand" "w")]
2609			  VSUBW))]
2610  "TARGET_NEON"
2611  "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2612  [(set_attr "type" "neon_sub_widen")]
2613)
2614
2615(define_insn "neon_vqsub<sup><mode>"
2616  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2617        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2618		       (match_operand:VDQIX 2 "s_register_operand" "w")]
2619		      VQSUB))]
2620  "TARGET_NEON"
2621  "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2622  [(set_attr "type" "neon_qsub<q>")]
2623)
2624
2625(define_insn "neon_vhsub<sup><mode>"
2626  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2627        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2628		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2629		      VHSUB))]
2630  "TARGET_NEON"
2631  "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2632  [(set_attr "type" "neon_sub_halve<q>")]
2633)
2634
2635(define_insn "neon_v<r>subhn<mode>"
2636  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2637        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2638		            (match_operand:VN 2 "s_register_operand" "w")]
2639                           VSUBHN))]
2640  "TARGET_NEON"
2641  "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2642  [(set_attr "type" "neon_sub_halve_narrow_q")]
2643)
2644
2645;; These may expand to an UNSPEC pattern when a floating point mode is used
2646;; without unsafe math optimizations.
2647(define_expand "neon_vc<cmp_op><mode>"
2648  [(match_operand:<V_cmp_result> 0 "s_register_operand")
2649     (neg:<V_cmp_result>
2650       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2651                         (match_operand:VDQW 2 "reg_or_zero_operand")))]
2652  "TARGET_NEON"
2653  {
2654    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2655       are enabled.  */
2656    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2657        && !flag_unsafe_math_optimizations)
2658      {
2659        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2660           we define gen_neon_vceq<mode>_insn_unspec only for float modes
2661           whereas this expander iterates over the integer modes as well,
2662           but we will never expand to UNSPECs for the integer comparisons.  */
2663        switch (<MODE>mode)
2664          {
2665            case E_V2SFmode:
2666              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2667                                                              operands[1],
2668                                                              operands[2]));
2669              break;
2670            case E_V4SFmode:
2671              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2672                                                              operands[1],
2673                                                              operands[2]));
2674              break;
2675            default:
2676              gcc_unreachable ();
2677          }
2678      }
2679    else
2680      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2681                                                 operands[1],
2682                                                 operands[2]));
2683    DONE;
2684  }
2685)
2686
2687(define_insn "neon_vc<cmp_op><mode>_insn"
2688  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2689        (neg:<V_cmp_result>
2690          (COMPARISONS:<V_cmp_result>
2691            (match_operand:VDQW 1 "s_register_operand" "w,w")
2692            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2693  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2694                    && !flag_unsafe_math_optimizations)"
2695  {
2696    char pattern[100];
2697    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2698                      " %%<V_reg>1, %s",
2699                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2700                         ? "f" : "<cmp_type>",
2701                       which_alternative == 0
2702                         ? "%<V_reg>2" : "#0");
2703    output_asm_insn (pattern, operands);
2704    return "";
2705  }
2706  [(set (attr "type")
2707        (if_then_else (match_operand 2 "zero_operand")
2708                      (const_string "neon_compare_zero<q>")
2709                      (const_string "neon_compare<q>")))]
2710)
2711
2712(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2713  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2714        (unspec:<V_cmp_result>
2715	  [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2716	   (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2717          NEON_VCMP))]
2718  "TARGET_NEON"
2719  {
2720    char pattern[100];
2721    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2722                       " %%<V_reg>1, %s",
2723                       which_alternative == 0
2724                         ? "%<V_reg>2" : "#0");
2725    output_asm_insn (pattern, operands);
2726    return "";
2727}
2728  [(set_attr "type" "neon_fp_compare_s<q>")]
2729)
2730
2731(define_expand "neon_vc<cmp_op><mode>"
2732 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2733  (neg:<V_cmp_result>
2734   (COMPARISONS:VH
2735    (match_operand:VH 1 "s_register_operand")
2736    (match_operand:VH 2 "reg_or_zero_operand")))]
2737 "TARGET_NEON_FP16INST"
2738{
2739  /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2740     are enabled.  */
2741  if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2742      && !flag_unsafe_math_optimizations)
2743    emit_insn
2744      (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2745       (operands[0], operands[1], operands[2]));
2746  else
2747    emit_insn
2748      (gen_neon_vc<cmp_op><mode>_fp16insn
2749       (operands[0], operands[1], operands[2]));
2750  DONE;
2751})
2752
2753(define_insn "neon_vc<cmp_op><mode>_fp16insn"
2754 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2755   (neg:<V_cmp_result>
2756    (COMPARISONS:<V_cmp_result>
2757     (match_operand:VH 1 "s_register_operand" "w,w")
2758     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2759 "TARGET_NEON_FP16INST
2760  && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2761  && !flag_unsafe_math_optimizations)"
2762{
2763  char pattern[100];
2764  sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2765	   " %%<V_reg>1, %s",
2766	   GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2767	   ? "f" : "<cmp_type>",
2768	   which_alternative == 0
2769	   ? "%<V_reg>2" : "#0");
2770  output_asm_insn (pattern, operands);
2771  return "";
2772}
2773 [(set (attr "type")
2774   (if_then_else (match_operand 2 "zero_operand")
2775    (const_string "neon_compare_zero<q>")
2776    (const_string "neon_compare<q>")))])
2777
2778(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2779 [(set
2780   (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2781   (unspec:<V_cmp_result>
2782    [(match_operand:VH 1 "s_register_operand" "w,w")
2783     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2784    NEON_VCMP))]
2785 "TARGET_NEON_FP16INST"
2786{
2787  char pattern[100];
2788  sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2789	   " %%<V_reg>1, %s",
2790	   which_alternative == 0
2791	   ? "%<V_reg>2" : "#0");
2792  output_asm_insn (pattern, operands);
2793  return "";
2794}
2795 [(set_attr "type" "neon_fp_compare_s<q>")])
2796
2797(define_insn "neon_vc<cmp_op>u<mode>"
2798  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2799        (neg:<V_cmp_result>
2800          (GTUGEU:<V_cmp_result>
2801	    (match_operand:VDQIW 1 "s_register_operand" "w")
2802	    (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2803  "TARGET_NEON"
2804  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2805  [(set_attr "type" "neon_compare<q>")]
2806)
2807
2808(define_expand "neon_vca<cmp_op><mode>"
2809  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2810        (neg:<V_cmp_result>
2811          (GTGE:<V_cmp_result>
2812            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2813            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2814  "TARGET_NEON"
2815  {
2816    if (flag_unsafe_math_optimizations)
2817      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2818                                                  operands[2]));
2819    else
2820      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2821                                                         operands[1],
2822                                                         operands[2]));
2823    DONE;
2824  }
2825)
2826
2827(define_insn "neon_vca<cmp_op><mode>_insn"
2828  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2829        (neg:<V_cmp_result>
2830          (GTGE:<V_cmp_result>
2831            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2832            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2833  "TARGET_NEON && flag_unsafe_math_optimizations"
2834  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2835  [(set_attr "type" "neon_fp_compare_s<q>")]
2836)
2837
2838(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2839  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2840        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2841		                (match_operand:VCVTF 2 "s_register_operand" "w")]
2842                               NEON_VACMP))]
2843  "TARGET_NEON"
2844  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2845  [(set_attr "type" "neon_fp_compare_s<q>")]
2846)
2847
2848(define_expand "neon_vca<cmp_op><mode>"
2849  [(set
2850    (match_operand:<V_cmp_result> 0 "s_register_operand")
2851    (neg:<V_cmp_result>
2852     (GLTE:<V_cmp_result>
2853      (abs:VH (match_operand:VH 1 "s_register_operand"))
2854      (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2855 "TARGET_NEON_FP16INST"
2856{
2857  if (flag_unsafe_math_optimizations)
2858    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2859	       (operands[0], operands[1], operands[2]));
2860  else
2861    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2862	       (operands[0], operands[1], operands[2]));
2863  DONE;
2864})
2865
2866(define_insn "neon_vca<cmp_op><mode>_fp16insn"
2867  [(set
2868    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2869    (neg:<V_cmp_result>
2870     (GLTE:<V_cmp_result>
2871      (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2872      (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2873 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2874 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2875 [(set_attr "type" "neon_fp_compare_s<q>")]
2876)
2877
2878(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2879 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2880   (unspec:<V_cmp_result>
2881    [(match_operand:VH 1 "s_register_operand" "w")
2882     (match_operand:VH 2 "s_register_operand" "w")]
2883    NEON_VAGLTE))]
2884 "TARGET_NEON"
2885 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2886 [(set_attr "type" "neon_fp_compare_s<q>")]
2887)
2888
2889(define_expand "neon_vc<cmp_op>z<mode>"
2890 [(set
2891   (match_operand:<V_cmp_result> 0 "s_register_operand")
2892   (COMPARISONS:<V_cmp_result>
2893    (match_operand:VH 1 "s_register_operand")
2894    (const_int 0)))]
2895 "TARGET_NEON_FP16INST"
2896 {
2897  emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2898					CONST0_RTX (<MODE>mode)));
2899  DONE;
2900})
2901
2902(define_insn "neon_vtst<mode>"
2903  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2904        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2905		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2906		      UNSPEC_VTST))]
2907  "TARGET_NEON"
2908  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2909  [(set_attr "type" "neon_tst<q>")]
2910)
2911
2912(define_insn "neon_vabd<sup><mode>"
2913  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2914        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2915		      (match_operand:VDQIW 2 "s_register_operand" "w")]
2916		     VABD))]
2917  "TARGET_NEON"
2918  "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2919  [(set_attr "type" "neon_abd<q>")]
2920)
2921
2922(define_insn "neon_vabd<mode>"
2923  [(set (match_operand:VH 0 "s_register_operand" "=w")
2924    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2925		(match_operand:VH 2 "s_register_operand" "w")]
2926     UNSPEC_VABD_F))]
2927 "TARGET_NEON_FP16INST"
2928 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2929  [(set_attr "type" "neon_abd<q>")]
2930)
2931
2932(define_insn "neon_vabdf<mode>"
2933  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2934        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2935		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2936		     UNSPEC_VABD_F))]
2937  "TARGET_NEON"
2938  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2939  [(set_attr "type" "neon_fp_abd_s<q>")]
2940)
2941
2942(define_insn "neon_vabdl<sup><mode>"
2943  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2944        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2945		           (match_operand:VW 2 "s_register_operand" "w")]
2946                          VABDL))]
2947  "TARGET_NEON"
2948  "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2949  [(set_attr "type" "neon_abd_long")]
2950)
2951
2952(define_insn "neon_vaba<sup><mode>"
2953  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2954        (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2955		                   (match_operand:VDQIW 3 "s_register_operand" "w")]
2956		                  VABD)
2957		    (match_operand:VDQIW 1 "s_register_operand" "0")))]
2958  "TARGET_NEON"
2959  "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2960  [(set_attr "type" "neon_arith_acc<q>")]
2961)
2962
2963(define_insn "neon_vabal<sup><mode>"
2964  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2965        (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2966                                           (match_operand:VW 3 "s_register_operand" "w")]
2967					   VABDL)
2968			 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2969  "TARGET_NEON"
2970  "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2971  [(set_attr "type" "neon_arith_acc<q>")]
2972)
2973
2974(define_expand "<sup>sadv16qi"
2975  [(use (match_operand:V4SI 0 "register_operand"))
2976   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2977                  (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2978   (use (match_operand:V4SI 3 "register_operand"))]
2979  "TARGET_NEON"
2980  {
2981    rtx reduc = gen_reg_rtx (V8HImode);
2982    rtx op1_highpart = gen_reg_rtx (V8QImode);
2983    rtx op2_highpart = gen_reg_rtx (V8QImode);
2984
2985    emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2986                                        gen_lowpart (V8QImode, operands[1]),
2987                                        gen_lowpart (V8QImode, operands[2])));
2988
2989    emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2990    emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2991    emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2992                                        op1_highpart, op2_highpart));
2993    emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2994
2995    emit_move_insn (operands[0], operands[3]);
2996    DONE;
2997  }
2998)
2999
3000(define_insn "neon_v<maxmin><sup><mode>"
3001  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3002        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3003		      (match_operand:VDQIW 2 "s_register_operand" "w")]
3004                     VMAXMIN))]
3005  "TARGET_NEON"
3006  "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3007  [(set_attr "type" "neon_minmax<q>")]
3008)
3009
3010(define_insn "neon_v<maxmin>f<mode>"
3011  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3012        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3013		      (match_operand:VCVTF 2 "s_register_operand" "w")]
3014                     VMAXMINF))]
3015  "TARGET_NEON"
3016  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3017  [(set_attr "type" "neon_fp_minmax_s<q>")]
3018)
3019
3020(define_insn "neon_v<maxmin>f<mode>"
3021 [(set (match_operand:VH 0 "s_register_operand" "=w")
3022   (unspec:VH
3023    [(match_operand:VH 1 "s_register_operand" "w")
3024     (match_operand:VH 2 "s_register_operand" "w")]
3025    VMAXMINF))]
3026 "TARGET_NEON_FP16INST"
3027 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3028 [(set_attr "type" "neon_fp_minmax_s<q>")]
3029)
3030
3031(define_insn "neon_vp<maxmin>fv4hf"
3032 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3033   (unspec:V4HF
3034    [(match_operand:V4HF 1 "s_register_operand" "w")
3035     (match_operand:V4HF 2 "s_register_operand" "w")]
3036    VPMAXMINF))]
3037 "TARGET_NEON_FP16INST"
3038 "vp<maxmin>.f16\t%P0, %P1, %P2"
3039  [(set_attr "type" "neon_reduc_minmax")]
3040)
3041
3042(define_insn "neon_<fmaxmin_op><mode>"
3043 [(set
3044   (match_operand:VH 0 "s_register_operand" "=w")
3045   (unspec:VH
3046    [(match_operand:VH 1 "s_register_operand" "w")
3047     (match_operand:VH 2 "s_register_operand" "w")]
3048    VMAXMINFNM))]
3049 "TARGET_NEON_FP16INST"
3050 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3051 [(set_attr "type" "neon_fp_minmax_s<q>")]
3052)
3053
3054;; v<maxmin>nm intrinsics.
3055(define_insn "neon_<fmaxmin_op><mode>"
3056  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3057	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3058		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3059		       VMAXMINFNM))]
3060  "TARGET_NEON && TARGET_VFP5"
3061  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3062  [(set_attr "type" "neon_fp_minmax_s<q>")]
3063)
3064
3065;; Vector forms for the IEEE-754 fmax()/fmin() functions
3066(define_insn "<fmaxmin><mode>3"
3067  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3068	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3069		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3070		       VMAXMINFNM))]
3071  "TARGET_NEON && TARGET_VFP5"
3072  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3073  [(set_attr "type" "neon_fp_minmax_s<q>")]
3074)
3075
3076(define_expand "neon_vpadd<mode>"
3077  [(match_operand:VD 0 "s_register_operand")
3078   (match_operand:VD 1 "s_register_operand")
3079   (match_operand:VD 2 "s_register_operand")]
3080  "TARGET_NEON"
3081{
3082  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3083					    operands[2]));
3084  DONE;
3085})
3086
3087(define_insn "neon_vpaddl<sup><mode>"
3088  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3089        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3090                                 VPADDL))]
3091  "TARGET_NEON"
3092  "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3093  [(set_attr "type" "neon_reduc_add_long")]
3094)
3095
3096(define_insn "neon_vpadal<sup><mode>"
3097  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3098        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3099                                  (match_operand:VDQIW 2 "s_register_operand" "w")]
3100                                 VPADAL))]
3101  "TARGET_NEON"
3102  "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3103  [(set_attr "type" "neon_reduc_add_acc")]
3104)
3105
3106(define_insn "neon_vp<maxmin><sup><mode>"
3107  [(set (match_operand:VDI 0 "s_register_operand" "=w")
3108        (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3109		    (match_operand:VDI 2 "s_register_operand" "w")]
3110                   VPMAXMIN))]
3111  "TARGET_NEON"
3112  "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3113  [(set_attr "type" "neon_reduc_minmax<q>")]
3114)
3115
3116(define_insn "neon_vp<maxmin>f<mode>"
3117  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3118        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3119		    (match_operand:VCVTF 2 "s_register_operand" "w")]
3120                   VPMAXMINF))]
3121  "TARGET_NEON"
3122  "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3123  [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3124)
3125
3126(define_insn "neon_vrecps<mode>"
3127  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3128        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3129		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3130                      UNSPEC_VRECPS))]
3131  "TARGET_NEON"
3132  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3133  [(set_attr "type" "neon_fp_recps_s<q>")]
3134)
3135
3136(define_insn "neon_vrecps<mode>"
3137  [(set
3138    (match_operand:VH 0 "s_register_operand" "=w")
3139    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3140		(match_operand:VH 2 "s_register_operand" "w")]
3141     UNSPEC_VRECPS))]
3142  "TARGET_NEON_FP16INST"
3143  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3144  [(set_attr "type" "neon_fp_recps_s<q>")]
3145)
3146
3147(define_insn "neon_vrsqrts<mode>"
3148  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3149        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3150		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3151                      UNSPEC_VRSQRTS))]
3152  "TARGET_NEON"
3153  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3154  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3155)
3156
3157(define_insn "neon_vrsqrts<mode>"
3158  [(set
3159    (match_operand:VH 0 "s_register_operand" "=w")
3160    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3161		 (match_operand:VH 2 "s_register_operand" "w")]
3162     UNSPEC_VRSQRTS))]
3163 "TARGET_NEON_FP16INST"
3164 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3165 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3166)
3167
3168(define_expand "neon_vabs<mode>"
3169  [(match_operand:VDQW 0 "s_register_operand")
3170   (match_operand:VDQW 1 "s_register_operand")]
3171  "TARGET_NEON"
3172{
3173  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3174  DONE;
3175})
3176
3177(define_insn "neon_vqabs<mode>"
3178  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3179	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3180		      UNSPEC_VQABS))]
3181  "TARGET_NEON"
3182  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3183  [(set_attr "type" "neon_qabs<q>")]
3184)
3185
3186(define_insn "neon_bswap<mode>"
3187  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3188        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3189  "TARGET_NEON"
3190  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3191  [(set_attr "type" "neon_rev<q>")]
3192)
3193
3194(define_expand "neon_vneg<mode>"
3195  [(match_operand:VDQW 0 "s_register_operand")
3196   (match_operand:VDQW 1 "s_register_operand")]
3197  "TARGET_NEON"
3198{
3199  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3200  DONE;
3201})
3202
3203
3204;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3205;; fact that their usage need to guarantee that the source vectors are
3206;; contiguous.  It would be wrong to describe the operation without being able
3207;; to describe the permute that is also required, but even if that is done
3208;; the permute would have been created as a LOAD_LANES which means the values
3209;; in the registers are in the wrong order.
3210(define_insn "neon_vcadd<rot><mode>"
3211  [(set (match_operand:VF 0 "register_operand" "=w")
3212	(unspec:VF [(match_operand:VF 1 "register_operand" "w")
3213		    (match_operand:VF 2 "register_operand" "w")]
3214		    VCADD))]
3215  "TARGET_COMPLEX"
3216  "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3217  [(set_attr "type" "neon_fcadd")]
3218)
3219
3220(define_insn "neon_vcmla<rot><mode>"
3221  [(set (match_operand:VF 0 "register_operand" "=w")
3222	(plus:VF (match_operand:VF 1 "register_operand" "0")
3223		 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3224			     (match_operand:VF 3 "register_operand" "w")]
3225			     VCMLA)))]
3226  "TARGET_COMPLEX"
3227  "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3228  [(set_attr "type" "neon_fcmla")]
3229)
3230
3231(define_insn "neon_vcmla_lane<rot><mode>"
3232  [(set (match_operand:VF 0 "s_register_operand" "=w")
3233	(plus:VF (match_operand:VF 1 "s_register_operand" "0")
3234		 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3235			     (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3236			     (match_operand:SI 4 "const_int_operand" "n")]
3237			     VCMLA)))]
3238  "TARGET_COMPLEX"
3239  {
3240    operands = neon_vcmla_lane_prepare_operands (operands);
3241    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3242  }
3243  [(set_attr "type" "neon_fcmla")]
3244)
3245
3246(define_insn "neon_vcmla_laneq<rot><mode>"
3247  [(set (match_operand:VDF 0 "s_register_operand" "=w")
3248	(plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3249		  (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3250			      (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3251			      (match_operand:SI 4 "const_int_operand" "n")]
3252			      VCMLA)))]
3253  "TARGET_COMPLEX"
3254  {
3255    operands = neon_vcmla_lane_prepare_operands (operands);
3256    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3257  }
3258  [(set_attr "type" "neon_fcmla")]
3259)
3260
3261(define_insn "neon_vcmlaq_lane<rot><mode>"
3262  [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3263	(plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3264		 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3265				 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3266				 (match_operand:SI 4 "const_int_operand" "n")]
3267				 VCMLA)))]
3268  "TARGET_COMPLEX"
3269  {
3270    operands = neon_vcmla_lane_prepare_operands (operands);
3271    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3272  }
3273  [(set_attr "type" "neon_fcmla")]
3274)
3275
3276
3277;; These instructions map to the __builtins for the Dot Product operations.
3278(define_insn "neon_<sup>dot<vsi2qi>"
3279  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3280	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3281		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
3282							"register_operand" "w")
3283				   (match_operand:<VSI2QI> 3
3284							"register_operand" "w")]
3285		DOTPROD)))]
3286  "TARGET_DOTPROD"
3287  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3288  [(set_attr "type" "neon_dot<q>")]
3289)
3290
3291;; These instructions map to the __builtins for the Dot Product operations.
3292(define_insn "neon_usdot<vsi2qi>"
3293  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3294	(plus:VCVTI
3295	  (unspec:VCVTI
3296	    [(match_operand:<VSI2QI> 2 "register_operand" "w")
3297	    (match_operand:<VSI2QI> 3 "register_operand" "w")]
3298	    UNSPEC_DOT_US)
3299	  (match_operand:VCVTI 1 "register_operand" "0")))]
3300  "TARGET_I8MM"
3301  "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3302  [(set_attr "type" "neon_dot<q>")]
3303)
3304
3305;; These instructions map to the __builtins for the Dot Product
3306;; indexed operations.
3307(define_insn "neon_<sup>dot_lane<vsi2qi>"
3308  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3309	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3310		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
3311							"register_operand" "w")
3312				   (match_operand:V8QI 3 "register_operand" "t")
3313				   (match_operand:SI 4 "immediate_operand" "i")]
3314		DOTPROD)))]
3315  "TARGET_DOTPROD"
3316  {
3317    operands[4]
3318      = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3319    return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3320  }
3321  [(set_attr "type" "neon_dot<q>")]
3322)
3323
3324;; These instructions map to the __builtins for the Dot Product
3325;; indexed operations in the v8.6 I8MM extension.
3326(define_insn "neon_<sup>dot_lane<vsi2qi>"
3327  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3328	(plus:VCVTI
3329	  (unspec:VCVTI
3330	   [(match_operand:<VSI2QI> 2 "register_operand" "w")
3331	    (match_operand:V8QI 3 "register_operand" "t")
3332	    (match_operand:SI 4 "immediate_operand" "i")]
3333	    DOTPROD_I8MM)
3334	  (match_operand:VCVTI 1 "register_operand" "0")))]
3335  "TARGET_I8MM"
3336  {
3337    operands[4] = GEN_INT (INTVAL (operands[4]));
3338    return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3339  }
3340  [(set_attr "type" "neon_dot<q>")]
3341)
3342
3343;; These expands map to the Dot Product optab the vectorizer checks for.
3344;; The auto-vectorizer expects a dot product builtin that also does an
3345;; accumulation into the provided register.
3346;; Given the following pattern
3347;;
3348;; for (i=0; i<len; i++) {
3349;;     c = a[i] * b[i];
3350;;     r += c;
3351;; }
3352;; return result;
3353;;
3354;; This can be auto-vectorized to
3355;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3356;;
3357;; given enough iterations.  However the vectorizer can keep unrolling the loop
3358;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3359;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3360;; ...
3361;;
3362;; and so the vectorizer provides r, in which the result has to be accumulated.
3363(define_expand "<sup>dot_prod<vsi2qi>"
3364  [(set (match_operand:VCVTI 0 "register_operand")
3365	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3366							"register_operand")
3367				   (match_operand:<VSI2QI> 2
3368							"register_operand")]
3369		     DOTPROD)
3370		    (match_operand:VCVTI 3 "register_operand")))]
3371  "TARGET_DOTPROD"
3372{
3373  emit_insn (
3374    gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3375				 operands[2]));
3376  emit_insn (gen_rtx_SET (operands[0], operands[3]));
3377  DONE;
3378})
3379
3380(define_expand "neon_copysignf<mode>"
3381  [(match_operand:VCVTF 0 "register_operand")
3382   (match_operand:VCVTF 1 "register_operand")
3383   (match_operand:VCVTF 2 "register_operand")]
3384  "TARGET_NEON"
3385  "{
3386     rtx v_bitmask_cast;
3387     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3388     rtx c = gen_int_mode (0x80000000, SImode);
3389
3390     emit_move_insn (v_bitmask,
3391		     gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3392     emit_move_insn (operands[0], operands[2]);
3393     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3394					   <VCVTF:V_cmp_result>mode, 0);
3395     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3396				     operands[1]));
3397
3398     DONE;
3399  }"
3400)
3401
3402(define_insn "neon_vqneg<mode>"
3403  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3404	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3405		      UNSPEC_VQNEG))]
3406  "TARGET_NEON"
3407  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3408  [(set_attr "type" "neon_qneg<q>")]
3409)
3410
3411(define_insn "neon_vcls<mode>"
3412  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3413	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3414		      UNSPEC_VCLS))]
3415  "TARGET_NEON"
3416  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3417  [(set_attr "type" "neon_cls<q>")]
3418)
3419
3420(define_insn "clz<mode>2"
3421  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3422        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3423  "TARGET_NEON"
3424  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3425  [(set_attr "type" "neon_cnt<q>")]
3426)
3427
3428(define_expand "neon_vclz<mode>"
3429  [(match_operand:VDQIW 0 "s_register_operand")
3430   (match_operand:VDQIW 1 "s_register_operand")]
3431  "TARGET_NEON"
3432{
3433  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3434  DONE;
3435})
3436
3437(define_insn "popcount<mode>2"
3438  [(set (match_operand:VE 0 "s_register_operand" "=w")
3439        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3440  "TARGET_NEON"
3441  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3442  [(set_attr "type" "neon_cnt<q>")]
3443)
3444
3445(define_expand "neon_vcnt<mode>"
3446  [(match_operand:VE 0 "s_register_operand")
3447   (match_operand:VE 1 "s_register_operand")]
3448  "TARGET_NEON"
3449{
3450  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3451  DONE;
3452})
3453
3454(define_insn "neon_vrecpe<mode>"
3455  [(set (match_operand:VH 0 "s_register_operand" "=w")
3456	(unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3457		   UNSPEC_VRECPE))]
3458  "TARGET_NEON_FP16INST"
3459  "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3460  [(set_attr "type" "neon_fp_recpe_s<q>")]
3461)
3462
3463(define_insn "neon_vrecpe<mode>"
3464  [(set (match_operand:V32 0 "s_register_operand" "=w")
3465	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3466                    UNSPEC_VRECPE))]
3467  "TARGET_NEON"
3468  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3469  [(set_attr "type" "neon_fp_recpe_s<q>")]
3470)
3471
3472(define_insn "neon_vrsqrte<mode>"
3473  [(set (match_operand:V32 0 "s_register_operand" "=w")
3474	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3475                    UNSPEC_VRSQRTE))]
3476  "TARGET_NEON"
3477  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3478  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3479)
3480
3481(define_expand "neon_vmvn<mode>"
3482  [(match_operand:VDQIW 0 "s_register_operand")
3483   (match_operand:VDQIW 1 "s_register_operand")]
3484  "TARGET_NEON"
3485{
3486  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3487  DONE;
3488})
3489
3490(define_insn "neon_vget_lane<mode>_sext_internal"
3491  [(set (match_operand:SI 0 "s_register_operand" "=r")
3492	(sign_extend:SI
3493	  (vec_select:<V_elem>
3494	    (match_operand:VD 1 "s_register_operand" "w")
3495	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3496  "TARGET_NEON"
3497{
3498  if (BYTES_BIG_ENDIAN)
3499    {
3500      int elt = INTVAL (operands[2]);
3501      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3502      operands[2] = GEN_INT (elt);
3503    }
3504  return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3505}
3506  [(set_attr "type" "neon_to_gp")]
3507)
3508
3509(define_insn "neon_vget_lane<mode>_zext_internal"
3510  [(set (match_operand:SI 0 "s_register_operand" "=r")
3511	(zero_extend:SI
3512	  (vec_select:<V_elem>
3513	    (match_operand:VD 1 "s_register_operand" "w")
3514	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3515  "TARGET_NEON"
3516{
3517  if (BYTES_BIG_ENDIAN)
3518    {
3519      int elt = INTVAL (operands[2]);
3520      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3521      operands[2] = GEN_INT (elt);
3522    }
3523  return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3524}
3525  [(set_attr "type" "neon_to_gp")]
3526)
3527
3528(define_insn "neon_vget_lane<mode>_sext_internal"
3529  [(set (match_operand:SI 0 "s_register_operand" "=r")
3530	(sign_extend:SI
3531	  (vec_select:<V_elem>
3532	    (match_operand:VQ2 1 "s_register_operand" "w")
3533	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3534  "TARGET_NEON"
3535{
3536  rtx ops[3];
3537  int regno = REGNO (operands[1]);
3538  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3539  unsigned int elt = INTVAL (operands[2]);
3540  unsigned int elt_adj = elt % halfelts;
3541
3542  if (BYTES_BIG_ENDIAN)
3543    elt_adj = halfelts - 1 - elt_adj;
3544
3545  ops[0] = operands[0];
3546  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3547  ops[2] = GEN_INT (elt_adj);
3548  output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3549
3550  return "";
3551}
3552  [(set_attr "type" "neon_to_gp_q")]
3553)
3554
3555(define_insn "neon_vget_lane<mode>_zext_internal"
3556  [(set (match_operand:SI 0 "s_register_operand" "=r")
3557	(zero_extend:SI
3558	  (vec_select:<V_elem>
3559	    (match_operand:VQ2 1 "s_register_operand" "w")
3560	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3561  "TARGET_NEON"
3562{
3563  rtx ops[3];
3564  int regno = REGNO (operands[1]);
3565  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3566  unsigned int elt = INTVAL (operands[2]);
3567  unsigned int elt_adj = elt % halfelts;
3568
3569  if (BYTES_BIG_ENDIAN)
3570    elt_adj = halfelts - 1 - elt_adj;
3571
3572  ops[0] = operands[0];
3573  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3574  ops[2] = GEN_INT (elt_adj);
3575  output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3576
3577  return "";
3578}
3579  [(set_attr "type" "neon_to_gp_q")]
3580)
3581
3582(define_expand "neon_vget_lane<mode>"
3583  [(match_operand:<V_ext> 0 "s_register_operand")
3584   (match_operand:VDQW 1 "s_register_operand")
3585   (match_operand:SI 2 "immediate_operand")]
3586  "TARGET_NEON"
3587{
3588  if (BYTES_BIG_ENDIAN)
3589    {
3590      /* The intrinsics are defined in terms of a model where the
3591	 element ordering in memory is vldm order, whereas the generic
3592	 RTL is defined in terms of a model where the element ordering
3593	 in memory is array order.  Convert the lane number to conform
3594	 to this model.  */
3595      unsigned int elt = INTVAL (operands[2]);
3596      unsigned int reg_nelts
3597	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3598      elt ^= reg_nelts - 1;
3599      operands[2] = GEN_INT (elt);
3600    }
3601
3602  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3603    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3604						operands[2]));
3605  else
3606    emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3607						       operands[1],
3608						       operands[2]));
3609  DONE;
3610})
3611
3612(define_expand "neon_vget_laneu<mode>"
3613  [(match_operand:<V_ext> 0 "s_register_operand")
3614   (match_operand:VDQIW 1 "s_register_operand")
3615   (match_operand:SI 2 "immediate_operand")]
3616  "TARGET_NEON"
3617{
3618  if (BYTES_BIG_ENDIAN)
3619    {
3620      /* The intrinsics are defined in terms of a model where the
3621	 element ordering in memory is vldm order, whereas the generic
3622	 RTL is defined in terms of a model where the element ordering
3623	 in memory is array order.  Convert the lane number to conform
3624	 to this model.  */
3625      unsigned int elt = INTVAL (operands[2]);
3626      unsigned int reg_nelts
3627	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3628      elt ^= reg_nelts - 1;
3629      operands[2] = GEN_INT (elt);
3630    }
3631
3632  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3633    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3634						operands[2]));
3635  else
3636    emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3637						       operands[1],
3638						       operands[2]));
3639  DONE;
3640})
3641
3642(define_expand "neon_vget_lanedi"
3643  [(match_operand:DI 0 "s_register_operand")
3644   (match_operand:DI 1 "s_register_operand")
3645   (match_operand:SI 2 "immediate_operand")]
3646  "TARGET_NEON"
3647{
3648  emit_move_insn (operands[0], operands[1]);
3649  DONE;
3650})
3651
3652(define_expand "neon_vget_lanev2di"
3653  [(match_operand:DI 0 "s_register_operand")
3654   (match_operand:V2DI 1 "s_register_operand")
3655   (match_operand:SI 2 "immediate_operand")]
3656  "TARGET_NEON"
3657{
3658  int lane;
3659
3660if (BYTES_BIG_ENDIAN)
3661    {
3662      /* The intrinsics are defined in terms of a model where the
3663	 element ordering in memory is vldm order, whereas the generic
3664	 RTL is defined in terms of a model where the element ordering
3665	 in memory is array order.  Convert the lane number to conform
3666	 to this model.  */
3667      unsigned int elt = INTVAL (operands[2]);
3668      unsigned int reg_nelts = 2;
3669      elt ^= reg_nelts - 1;
3670      operands[2] = GEN_INT (elt);
3671    }
3672
3673  lane = INTVAL (operands[2]);
3674  gcc_assert ((lane ==0) || (lane == 1));
3675  emit_move_insn (operands[0], lane == 0
3676				? gen_lowpart (DImode, operands[1])
3677				: gen_highpart (DImode, operands[1]));
3678  DONE;
3679})
3680
3681(define_expand "neon_vset_lane<mode>"
3682  [(match_operand:VDQ 0 "s_register_operand")
3683   (match_operand:<V_elem> 1 "s_register_operand")
3684   (match_operand:VDQ 2 "s_register_operand")
3685   (match_operand:SI 3 "immediate_operand")]
3686  "TARGET_NEON"
3687{
3688  unsigned int elt = INTVAL (operands[3]);
3689
3690  if (BYTES_BIG_ENDIAN)
3691    {
3692      unsigned int reg_nelts
3693	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3694      elt ^= reg_nelts - 1;
3695    }
3696
3697  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3698                                         GEN_INT (1 << elt), operands[2]));
3699  DONE;
3700})
3701
3702; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3703
3704(define_expand "neon_vset_lanedi"
3705  [(match_operand:DI 0 "s_register_operand")
3706   (match_operand:DI 1 "s_register_operand")
3707   (match_operand:DI 2 "s_register_operand")
3708   (match_operand:SI 3 "immediate_operand")]
3709  "TARGET_NEON"
3710{
3711  emit_move_insn (operands[0], operands[1]);
3712  DONE;
3713})
3714
3715(define_expand "neon_vcreate<mode>"
3716  [(match_operand:VD_RE 0 "s_register_operand")
3717   (match_operand:DI 1 "general_operand")]
3718  "TARGET_NEON"
3719{
3720  rtx src = gen_lowpart (<MODE>mode, operands[1]);
3721  emit_move_insn (operands[0], src);
3722  DONE;
3723})
3724
3725(define_insn "neon_vdup_n<mode>"
3726  [(set (match_operand:VX 0 "s_register_operand" "=w")
3727        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3728  "TARGET_NEON"
3729  "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3730  [(set_attr "type" "neon_from_gp<q>")]
3731)
3732
3733(define_insn "neon_vdup_nv4hf"
3734  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3735        (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3736  "TARGET_NEON"
3737  "vdup.16\t%P0, %1"
3738  [(set_attr "type" "neon_from_gp")]
3739)
3740
3741(define_insn "neon_vdup_nv8hf"
3742  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3743        (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3744  "TARGET_NEON"
3745  "vdup.16\t%q0, %1"
3746  [(set_attr "type" "neon_from_gp_q")]
3747)
3748
3749(define_insn "neon_vdup_nv4bf"
3750  [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3751        (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3752  "TARGET_NEON"
3753  "vdup.16\t%P0, %1"
3754  [(set_attr "type" "neon_from_gp")]
3755)
3756
3757(define_insn "neon_vdup_nv8bf"
3758  [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3759        (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3760  "TARGET_NEON"
3761  "vdup.16\t%q0, %1"
3762  [(set_attr "type" "neon_from_gp_q")]
3763)
3764
3765(define_insn "neon_vdup_n<mode>"
3766  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3767        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3768  "TARGET_NEON"
3769  "@
3770  vdup.<V_sz_elem>\t%<V_reg>0, %1
3771  vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3772  [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3773)
3774
3775(define_expand "neon_vdup_ndi"
3776  [(match_operand:DI 0 "s_register_operand")
3777   (match_operand:DI 1 "s_register_operand")]
3778  "TARGET_NEON"
3779{
3780  emit_move_insn (operands[0], operands[1]);
3781  DONE;
3782}
3783)
3784
3785(define_insn "neon_vdup_nv2di"
3786  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3787        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3788  "TARGET_NEON"
3789  "@
3790  vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3791  vmov\t%e0, %P1\;vmov\t%f0, %P1"
3792  [(set_attr "length" "8")
3793   (set_attr "type" "multiple")]
3794)
3795
3796(define_insn "neon_vdup_lane<mode>_internal"
3797  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3798  	(vec_duplicate:VDQW
3799          (vec_select:<V_elem>
3800            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3801            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3802  "TARGET_NEON"
3803{
3804  if (BYTES_BIG_ENDIAN)
3805    {
3806      int elt = INTVAL (operands[2]);
3807      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3808      operands[2] = GEN_INT (elt);
3809    }
3810  if (<Is_d_reg>)
3811    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3812  else
3813    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3814}
3815  [(set_attr "type" "neon_dup<q>")]
3816)
3817
3818(define_insn "neon_vdup_lane<mode>_internal"
3819 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3820   (vec_duplicate:VHFBF
3821    (vec_select:<V_elem>
3822     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3823     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3824 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3825{
3826  if (BYTES_BIG_ENDIAN)
3827    {
3828      int elt = INTVAL (operands[2]);
3829      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3830      operands[2] = GEN_INT (elt);
3831    }
3832  if (<Is_d_reg>)
3833    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3834  else
3835    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3836}
3837  [(set_attr "type" "neon_dup<q>")]
3838)
3839
3840(define_expand "neon_vdup_lane<mode>"
3841  [(match_operand:VDQW 0 "s_register_operand")
3842   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3843   (match_operand:SI 2 "immediate_operand")]
3844  "TARGET_NEON"
3845{
3846  if (BYTES_BIG_ENDIAN)
3847    {
3848      unsigned int elt = INTVAL (operands[2]);
3849      unsigned int reg_nelts
3850	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3851      elt ^= reg_nelts - 1;
3852      operands[2] = GEN_INT (elt);
3853    }
3854    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3855                                                  operands[2]));
3856    DONE;
3857})
3858
3859(define_expand "neon_vdup_lane<mode>"
3860  [(match_operand:VHFBF 0 "s_register_operand")
3861   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3862   (match_operand:SI 2 "immediate_operand")]
3863  "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3864{
3865  if (BYTES_BIG_ENDIAN)
3866    {
3867      unsigned int elt = INTVAL (operands[2]);
3868      unsigned int reg_nelts
3869	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3870      elt ^= reg_nelts - 1;
3871      operands[2] = GEN_INT (elt);
3872    }
3873  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3874						operands[2]));
3875  DONE;
3876})
3877
3878; Scalar index is ignored, since only zero is valid here.
3879(define_expand "neon_vdup_lanedi"
3880  [(match_operand:DI 0 "s_register_operand")
3881   (match_operand:DI 1 "s_register_operand")
3882   (match_operand:SI 2 "immediate_operand")]
3883  "TARGET_NEON"
3884{
3885  emit_move_insn (operands[0], operands[1]);
3886  DONE;
3887})
3888
3889; Likewise for v2di, as the DImode second operand has only a single element.
3890(define_expand "neon_vdup_lanev2di"
3891  [(match_operand:V2DI 0 "s_register_operand")
3892   (match_operand:DI 1 "s_register_operand")
3893   (match_operand:SI 2 "immediate_operand")]
3894  "TARGET_NEON"
3895{
3896  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3897  DONE;
3898})
3899
3900; Disabled before reload because we don't want combine doing something silly,
3901; but used by the post-reload expansion of neon_vcombine.
3902(define_insn "*neon_vswp<mode>"
3903  [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3904	(match_operand:VDQX 1 "s_register_operand" "+w"))
3905   (set (match_dup 1) (match_dup 0))]
3906  "TARGET_NEON && reload_completed"
3907  "vswp\t%<V_reg>0, %<V_reg>1"
3908  [(set_attr "type" "neon_permute<q>")]
3909)
3910
3911;; In this insn, operand 1 should be low, and operand 2 the high part of the
3912;; dest vector.
3913;; FIXME: A different implementation of this builtin could make it much
3914;; more likely that we wouldn't actually need to output anything (we could make
3915;; it so that the reg allocator puts things in the right places magically
3916;; instead). Lack of subregs for vectors makes that tricky though, I think.
3917
3918(define_insn_and_split "neon_vcombine<mode>"
3919  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3920        (vec_concat:<V_DOUBLE>
3921	  (match_operand:VDX 1 "s_register_operand" "w")
3922	  (match_operand:VDX 2 "s_register_operand" "w")))]
3923  "TARGET_NEON"
3924  "#"
3925  "&& reload_completed"
3926  [(const_int 0)]
3927{
3928  neon_split_vcombine (operands);
3929  DONE;
3930}
3931[(set_attr "type" "multiple")]
3932)
3933
3934(define_expand "neon_vget_high<mode>"
3935  [(match_operand:<V_HALF> 0 "s_register_operand")
3936   (match_operand:VQXBF 1 "s_register_operand")]
3937  "TARGET_NEON"
3938{
3939  emit_move_insn (operands[0],
3940		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3941				       GET_MODE_SIZE (<V_HALF>mode)));
3942  DONE;
3943})
3944
3945(define_expand "neon_vget_low<mode>"
3946  [(match_operand:<V_HALF> 0 "s_register_operand")
3947   (match_operand:VQX 1 "s_register_operand")]
3948  "TARGET_NEON"
3949{
3950  emit_move_insn (operands[0],
3951		  simplify_gen_subreg (<V_HALF>mode, operands[1],
3952				       <MODE>mode, 0));
3953  DONE;
3954})
3955
3956(define_insn "float<mode><V_cvtto>2"
3957  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3958        (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3959  "TARGET_NEON && !flag_rounding_math"
3960  "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3961  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3962)
3963
3964(define_insn "floatuns<mode><V_cvtto>2"
3965  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3966        (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3967  "TARGET_NEON && !flag_rounding_math"
3968  "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3969  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3970)
3971
3972(define_insn "fix_trunc<mode><V_cvtto>2"
3973  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3974        (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3975  "TARGET_NEON"
3976  "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3977  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3978)
3979
3980(define_insn "fixuns_trunc<mode><V_cvtto>2"
3981  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3982        (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3983  "TARGET_NEON"
3984  "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3985  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3986)
3987
3988(define_insn "neon_vcvt<sup><mode>"
3989  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3990	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3991			  VCVT_US))]
3992  "TARGET_NEON"
3993  "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3994  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3995)
3996
3997(define_insn "neon_vcvt<sup><mode>"
3998  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3999	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4000			  VCVT_US))]
4001  "TARGET_NEON"
4002  "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4003  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4004)
4005
4006(define_insn "neon_vcvtv4sfv4hf"
4007  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4008	(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4009			  UNSPEC_VCVT))]
4010  "TARGET_NEON && TARGET_FP16"
4011  "vcvt.f32.f16\t%q0, %P1"
4012  [(set_attr "type" "neon_fp_cvt_widen_h")]
4013)
4014
4015(define_insn "neon_vcvtv4hfv4sf"
4016  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4017	(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4018			  UNSPEC_VCVT))]
4019  "TARGET_NEON && TARGET_FP16"
4020  "vcvt.f16.f32\t%P0, %q1"
4021  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4022)
4023
4024(define_insn "neon_vcvt<sup><mode>"
4025 [(set
4026   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4027   (unspec:<VH_CVTTO>
4028    [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4029    VCVT_US))]
4030 "TARGET_NEON_FP16INST"
4031 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4032  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4033)
4034
4035(define_insn "neon_vcvt<sup><mode>"
4036 [(set
4037   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4038   (unspec:<VH_CVTTO>
4039    [(match_operand:VH 1 "s_register_operand" "w")]
4040    VCVT_US))]
4041 "TARGET_NEON_FP16INST"
4042 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4043  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4044)
4045
4046(define_insn "neon_vcvt<sup>_n<mode>"
4047  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4048	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4049			   (match_operand:SI 2 "immediate_operand" "i")]
4050			  VCVT_US_N))]
4051  "TARGET_NEON"
4052{
4053  arm_const_bounds (operands[2], 1, 33);
4054  return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4055}
4056  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4057)
4058
4059(define_insn "neon_vcvt<sup>_n<mode>"
4060 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4061   (unspec:<VH_CVTTO>
4062    [(match_operand:VH 1 "s_register_operand" "w")
4063     (match_operand:SI 2 "immediate_operand" "i")]
4064    VCVT_US_N))]
4065  "TARGET_NEON_FP16INST"
4066{
4067  arm_const_bounds (operands[2], 0, 17);
4068  return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4069}
4070 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4071)
4072
4073(define_insn "neon_vcvt<sup>_n<mode>"
4074  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4075	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4076			   (match_operand:SI 2 "immediate_operand" "i")]
4077			  VCVT_US_N))]
4078  "TARGET_NEON"
4079{
4080  arm_const_bounds (operands[2], 1, 33);
4081  return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4082}
4083  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4084)
4085
4086(define_insn "neon_vcvt<sup>_n<mode>"
4087 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4088   (unspec:<VH_CVTTO>
4089    [(match_operand:VCVTHI 1 "s_register_operand" "w")
4090     (match_operand:SI 2 "immediate_operand" "i")]
4091    VCVT_US_N))]
4092 "TARGET_NEON_FP16INST"
4093{
4094  arm_const_bounds (operands[2], 0, 17);
4095  return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4096}
4097 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4098)
4099
4100(define_insn "neon_vcvt<vcvth_op><sup><mode>"
4101 [(set
4102   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4103   (unspec:<VH_CVTTO>
4104    [(match_operand:VH 1 "s_register_operand" "w")]
4105    VCVT_HF_US))]
4106 "TARGET_NEON_FP16INST"
4107 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4108  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4109)
4110
4111(define_insn "neon_vmovn<mode>"
4112  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4113	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4114                           UNSPEC_VMOVN))]
4115  "TARGET_NEON"
4116  "vmovn.<V_if_elem>\t%P0, %q1"
4117  [(set_attr "type" "neon_shift_imm_narrow_q")]
4118)
4119
4120(define_insn "neon_vqmovn<sup><mode>"
4121  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4122	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4123                           VQMOVN))]
4124  "TARGET_NEON"
4125  "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4126  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4127)
4128
4129(define_insn "neon_vqmovun<mode>"
4130  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4131	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4132                           UNSPEC_VQMOVUN))]
4133  "TARGET_NEON"
4134  "vqmovun.<V_s_elem>\t%P0, %q1"
4135  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4136)
4137
4138(define_insn "neon_vmovl<sup><mode>"
4139  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4140	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4141                          VMOVL))]
4142  "TARGET_NEON"
4143  "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4144  [(set_attr "type" "neon_shift_imm_long")]
4145)
4146
4147(define_insn "neon_vmul_lane<mode>"
4148  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4149	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4150		     (match_operand:VMD 2 "s_register_operand"
4151                                        "<scalar_mul_constraint>")
4152                     (match_operand:SI 3 "immediate_operand" "i")]
4153                    UNSPEC_VMUL_LANE))]
4154  "TARGET_NEON"
4155{
4156  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4157}
4158  [(set (attr "type")
4159     (if_then_else (match_test "<Is_float_mode>")
4160                   (const_string "neon_fp_mul_s_scalar<q>")
4161                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4162)
4163
4164(define_insn "neon_vmul_lane<mode>"
4165  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4166	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4167		     (match_operand:<V_HALF> 2 "s_register_operand"
4168                                             "<scalar_mul_constraint>")
4169                     (match_operand:SI 3 "immediate_operand" "i")]
4170                    UNSPEC_VMUL_LANE))]
4171  "TARGET_NEON"
4172{
4173  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4174}
4175  [(set (attr "type")
4176     (if_then_else (match_test "<Is_float_mode>")
4177                   (const_string "neon_fp_mul_s_scalar<q>")
4178                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4179)
4180
4181(define_insn "neon_vmul_lane<mode>"
4182  [(set (match_operand:VH 0 "s_register_operand" "=w")
4183	(unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4184		    (match_operand:V4HF 2 "s_register_operand"
4185		     "<scalar_mul_constraint>")
4186		     (match_operand:SI 3 "immediate_operand" "i")]
4187		     UNSPEC_VMUL_LANE))]
4188  "TARGET_NEON_FP16INST"
4189  "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4190  [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4191)
4192
4193(define_insn "neon_vmull<sup>_lane<mode>"
4194  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4195	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4196		           (match_operand:VMDI 2 "s_register_operand"
4197					       "<scalar_mul_constraint>")
4198                           (match_operand:SI 3 "immediate_operand" "i")]
4199                          VMULL_LANE))]
4200  "TARGET_NEON"
4201{
4202  return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4203}
4204  [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4205)
4206
4207(define_insn "neon_vqdmull_lane<mode>"
4208  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4209	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4210		           (match_operand:VMDI 2 "s_register_operand"
4211					       "<scalar_mul_constraint>")
4212                           (match_operand:SI 3 "immediate_operand" "i")]
4213                          UNSPEC_VQDMULL_LANE))]
4214  "TARGET_NEON"
4215{
4216  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4217}
4218  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4219)
4220
4221(define_insn "neon_vq<r>dmulh_lane<mode>"
4222  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4223	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4224		      (match_operand:<V_HALF> 2 "s_register_operand"
4225					      "<scalar_mul_constraint>")
4226                      (match_operand:SI 3 "immediate_operand" "i")]
4227                      VQDMULH_LANE))]
4228  "TARGET_NEON"
4229{
4230  return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4231}
4232  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4233)
4234
4235(define_insn "neon_vq<r>dmulh_lane<mode>"
4236  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4237	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4238		      (match_operand:VMDI 2 "s_register_operand"
4239					  "<scalar_mul_constraint>")
4240                      (match_operand:SI 3 "immediate_operand" "i")]
4241                      VQDMULH_LANE))]
4242  "TARGET_NEON"
4243{
4244  return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4245}
4246  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4247)
4248
4249;; vqrdmlah_lane, vqrdmlsh_lane
4250(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4251  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4252	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4253		      (match_operand:VMQI 2 "s_register_operand" "w")
4254		      (match_operand:<V_HALF> 3 "s_register_operand"
4255					  "<scalar_mul_constraint>")
4256		      (match_operand:SI 4 "immediate_operand" "i")]
4257		     VQRDMLH_AS))]
4258  "TARGET_NEON_RDMA"
4259{
4260  return
4261   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4262}
4263  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4264)
4265
4266(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4267  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4268	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4269		      (match_operand:VMDI 2 "s_register_operand" "w")
4270		      (match_operand:VMDI 3 "s_register_operand"
4271					  "<scalar_mul_constraint>")
4272		      (match_operand:SI 4 "immediate_operand" "i")]
4273		     VQRDMLH_AS))]
4274  "TARGET_NEON_RDMA"
4275{
4276  return
4277   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4278}
4279  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4280)
4281
4282(define_insn "neon_vmla_lane<mode>"
4283  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4284	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4285		     (match_operand:VMD 2 "s_register_operand" "w")
4286                     (match_operand:VMD 3 "s_register_operand"
4287					"<scalar_mul_constraint>")
4288                     (match_operand:SI 4 "immediate_operand" "i")]
4289                     UNSPEC_VMLA_LANE))]
4290  "TARGET_NEON"
4291{
4292  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4293}
4294  [(set (attr "type")
4295     (if_then_else (match_test "<Is_float_mode>")
4296                   (const_string "neon_fp_mla_s_scalar<q>")
4297                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4298)
4299
4300(define_insn "neon_vmla_lane<mode>"
4301  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4302	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4303		     (match_operand:VMQ 2 "s_register_operand" "w")
4304                     (match_operand:<V_HALF> 3 "s_register_operand"
4305					     "<scalar_mul_constraint>")
4306                     (match_operand:SI 4 "immediate_operand" "i")]
4307                     UNSPEC_VMLA_LANE))]
4308  "TARGET_NEON"
4309{
4310  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4311}
4312  [(set (attr "type")
4313     (if_then_else (match_test "<Is_float_mode>")
4314                   (const_string "neon_fp_mla_s_scalar<q>")
4315                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4316)
4317
4318(define_insn "neon_vmlal<sup>_lane<mode>"
4319  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4320	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4321			   (match_operand:VMDI 2 "s_register_operand" "w")
4322                           (match_operand:VMDI 3 "s_register_operand"
4323					       "<scalar_mul_constraint>")
4324                           (match_operand:SI 4 "immediate_operand" "i")]
4325                          VMLAL_LANE))]
4326  "TARGET_NEON"
4327{
4328  return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4329}
4330  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4331)
4332
4333(define_insn "neon_vqdmlal_lane<mode>"
4334  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4335	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4336			   (match_operand:VMDI 2 "s_register_operand" "w")
4337                           (match_operand:VMDI 3 "s_register_operand"
4338					       "<scalar_mul_constraint>")
4339                           (match_operand:SI 4 "immediate_operand" "i")]
4340                          UNSPEC_VQDMLAL_LANE))]
4341  "TARGET_NEON"
4342{
4343  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4344}
4345  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4346)
4347
4348(define_insn "neon_vmls_lane<mode>"
4349  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4350	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4351		     (match_operand:VMD 2 "s_register_operand" "w")
4352                     (match_operand:VMD 3 "s_register_operand"
4353					"<scalar_mul_constraint>")
4354                     (match_operand:SI 4 "immediate_operand" "i")]
4355                    UNSPEC_VMLS_LANE))]
4356  "TARGET_NEON"
4357{
4358  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4359}
4360  [(set (attr "type")
4361     (if_then_else (match_test "<Is_float_mode>")
4362                   (const_string "neon_fp_mla_s_scalar<q>")
4363                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4364)
4365
4366(define_insn "neon_vmls_lane<mode>"
4367  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4368	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4369		     (match_operand:VMQ 2 "s_register_operand" "w")
4370                     (match_operand:<V_HALF> 3 "s_register_operand"
4371					     "<scalar_mul_constraint>")
4372                     (match_operand:SI 4 "immediate_operand" "i")]
4373                    UNSPEC_VMLS_LANE))]
4374  "TARGET_NEON"
4375{
4376  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4377}
4378  [(set (attr "type")
4379     (if_then_else (match_test "<Is_float_mode>")
4380                   (const_string "neon_fp_mla_s_scalar<q>")
4381                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4382)
4383
4384(define_insn "neon_vmlsl<sup>_lane<mode>"
4385  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4386	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4387			   (match_operand:VMDI 2 "s_register_operand" "w")
4388                           (match_operand:VMDI 3 "s_register_operand"
4389					       "<scalar_mul_constraint>")
4390                           (match_operand:SI 4 "immediate_operand" "i")]
4391                          VMLSL_LANE))]
4392  "TARGET_NEON"
4393{
4394  return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4395}
4396  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4397)
4398
4399(define_insn "neon_vqdmlsl_lane<mode>"
4400  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4401	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4402			   (match_operand:VMDI 2 "s_register_operand" "w")
4403                           (match_operand:VMDI 3 "s_register_operand"
4404					       "<scalar_mul_constraint>")
4405                           (match_operand:SI 4 "immediate_operand" "i")]
4406                          UNSPEC_VQDMLSL_LANE))]
4407  "TARGET_NEON"
4408{
4409  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4410}
4411  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4412)
4413
4414; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4415; core register into a temp register, then use a scalar taken from that. This
4416; isn't an optimal solution if e.g. the scalar has just been read from memory
4417; or extracted from another vector. The latter case it's currently better to
4418; use the "_lane" variant, and the former case can probably be implemented
4419; using vld1_lane, but that hasn't been done yet.
4420
4421(define_expand "neon_vmul_n<mode>"
4422  [(match_operand:VMD 0 "s_register_operand")
4423   (match_operand:VMD 1 "s_register_operand")
4424   (match_operand:<V_elem> 2 "s_register_operand")]
4425  "TARGET_NEON"
4426{
4427  rtx tmp = gen_reg_rtx (<MODE>mode);
4428  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4429  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4430				       const0_rtx));
4431  DONE;
4432})
4433
4434(define_expand "neon_vmul_n<mode>"
4435  [(match_operand:VMQ 0 "s_register_operand")
4436   (match_operand:VMQ 1 "s_register_operand")
4437   (match_operand:<V_elem> 2 "s_register_operand")]
4438  "TARGET_NEON"
4439{
4440  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4441  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4442  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4443				       const0_rtx));
4444  DONE;
4445})
4446
4447(define_expand "neon_vmul_n<mode>"
4448  [(match_operand:VH 0 "s_register_operand")
4449   (match_operand:VH 1 "s_register_operand")
4450   (match_operand:<V_elem> 2 "s_register_operand")]
4451  "TARGET_NEON_FP16INST"
4452{
4453  rtx tmp = gen_reg_rtx (V4HFmode);
4454  emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4455  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4456				       const0_rtx));
4457  DONE;
4458})
4459
4460(define_expand "neon_vmulls_n<mode>"
4461  [(match_operand:<V_widen> 0 "s_register_operand")
4462   (match_operand:VMDI 1 "s_register_operand")
4463   (match_operand:<V_elem> 2 "s_register_operand")]
4464  "TARGET_NEON"
4465{
4466  rtx tmp = gen_reg_rtx (<MODE>mode);
4467  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4468  emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4469					 const0_rtx));
4470  DONE;
4471})
4472
4473(define_expand "neon_vmullu_n<mode>"
4474  [(match_operand:<V_widen> 0 "s_register_operand")
4475   (match_operand:VMDI 1 "s_register_operand")
4476   (match_operand:<V_elem> 2 "s_register_operand")]
4477  "TARGET_NEON"
4478{
4479  rtx tmp = gen_reg_rtx (<MODE>mode);
4480  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4481  emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4482					 const0_rtx));
4483  DONE;
4484})
4485
4486(define_expand "neon_vqdmull_n<mode>"
4487  [(match_operand:<V_widen> 0 "s_register_operand")
4488   (match_operand:VMDI 1 "s_register_operand")
4489   (match_operand:<V_elem> 2 "s_register_operand")]
4490  "TARGET_NEON"
4491{
4492  rtx tmp = gen_reg_rtx (<MODE>mode);
4493  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4494  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4495				          const0_rtx));
4496  DONE;
4497})
4498
4499(define_expand "neon_vqdmulh_n<mode>"
4500  [(match_operand:VMDI 0 "s_register_operand")
4501   (match_operand:VMDI 1 "s_register_operand")
4502   (match_operand:<V_elem> 2 "s_register_operand")]
4503  "TARGET_NEON"
4504{
4505  rtx tmp = gen_reg_rtx (<MODE>mode);
4506  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4507  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4508				          const0_rtx));
4509  DONE;
4510})
4511
4512(define_expand "neon_vqrdmulh_n<mode>"
4513  [(match_operand:VMDI 0 "s_register_operand")
4514   (match_operand:VMDI 1 "s_register_operand")
4515   (match_operand:<V_elem> 2 "s_register_operand")]
4516  "TARGET_NEON"
4517{
4518  rtx tmp = gen_reg_rtx (<MODE>mode);
4519  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4520  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4521				          const0_rtx));
4522  DONE;
4523})
4524
4525(define_expand "neon_vqdmulh_n<mode>"
4526  [(match_operand:VMQI 0 "s_register_operand")
4527   (match_operand:VMQI 1 "s_register_operand")
4528   (match_operand:<V_elem> 2 "s_register_operand")]
4529  "TARGET_NEON"
4530{
4531  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4532  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4533  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4534					  const0_rtx));
4535  DONE;
4536})
4537
4538(define_expand "neon_vqrdmulh_n<mode>"
4539  [(match_operand:VMQI 0 "s_register_operand")
4540   (match_operand:VMQI 1 "s_register_operand")
4541   (match_operand:<V_elem> 2 "s_register_operand")]
4542  "TARGET_NEON"
4543{
4544  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4545  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4546  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4547					   const0_rtx));
4548  DONE;
4549})
4550
4551(define_expand "neon_vmla_n<mode>"
4552  [(match_operand:VMD 0 "s_register_operand")
4553   (match_operand:VMD 1 "s_register_operand")
4554   (match_operand:VMD 2 "s_register_operand")
4555   (match_operand:<V_elem> 3 "s_register_operand")]
4556  "TARGET_NEON"
4557{
4558  rtx tmp = gen_reg_rtx (<MODE>mode);
4559  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4560  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4561				       tmp, const0_rtx));
4562  DONE;
4563})
4564
4565(define_expand "neon_vmla_n<mode>"
4566  [(match_operand:VMQ 0 "s_register_operand")
4567   (match_operand:VMQ 1 "s_register_operand")
4568   (match_operand:VMQ 2 "s_register_operand")
4569   (match_operand:<V_elem> 3 "s_register_operand")]
4570  "TARGET_NEON"
4571{
4572  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4573  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4574  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4575				       tmp, const0_rtx));
4576  DONE;
4577})
4578
4579(define_expand "neon_vmlals_n<mode>"
4580  [(match_operand:<V_widen> 0 "s_register_operand")
4581   (match_operand:<V_widen> 1 "s_register_operand")
4582   (match_operand:VMDI 2 "s_register_operand")
4583   (match_operand:<V_elem> 3 "s_register_operand")]
4584  "TARGET_NEON"
4585{
4586  rtx tmp = gen_reg_rtx (<MODE>mode);
4587  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4588  emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4589					 tmp, const0_rtx));
4590  DONE;
4591})
4592
4593(define_expand "neon_vmlalu_n<mode>"
4594  [(match_operand:<V_widen> 0 "s_register_operand")
4595   (match_operand:<V_widen> 1 "s_register_operand")
4596   (match_operand:VMDI 2 "s_register_operand")
4597   (match_operand:<V_elem> 3 "s_register_operand")]
4598  "TARGET_NEON"
4599{
4600  rtx tmp = gen_reg_rtx (<MODE>mode);
4601  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4602  emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4603					 tmp, const0_rtx));
4604  DONE;
4605})
4606
4607(define_expand "neon_vqdmlal_n<mode>"
4608  [(match_operand:<V_widen> 0 "s_register_operand")
4609   (match_operand:<V_widen> 1 "s_register_operand")
4610   (match_operand:VMDI 2 "s_register_operand")
4611   (match_operand:<V_elem> 3 "s_register_operand")]
4612  "TARGET_NEON"
4613{
4614  rtx tmp = gen_reg_rtx (<MODE>mode);
4615  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4616  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4617					  tmp, const0_rtx));
4618  DONE;
4619})
4620
4621(define_expand "neon_vmls_n<mode>"
4622  [(match_operand:VMD 0 "s_register_operand")
4623   (match_operand:VMD 1 "s_register_operand")
4624   (match_operand:VMD 2 "s_register_operand")
4625   (match_operand:<V_elem> 3 "s_register_operand")]
4626  "TARGET_NEON"
4627{
4628  rtx tmp = gen_reg_rtx (<MODE>mode);
4629  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4630  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4631				       tmp, const0_rtx));
4632  DONE;
4633})
4634
4635(define_expand "neon_vmls_n<mode>"
4636  [(match_operand:VMQ 0 "s_register_operand")
4637   (match_operand:VMQ 1 "s_register_operand")
4638   (match_operand:VMQ 2 "s_register_operand")
4639   (match_operand:<V_elem> 3 "s_register_operand")]
4640  "TARGET_NEON"
4641{
4642  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4643  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4644  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4645				       tmp, const0_rtx));
4646  DONE;
4647})
4648
4649(define_expand "neon_vmlsls_n<mode>"
4650  [(match_operand:<V_widen> 0 "s_register_operand")
4651   (match_operand:<V_widen> 1 "s_register_operand")
4652   (match_operand:VMDI 2 "s_register_operand")
4653   (match_operand:<V_elem> 3 "s_register_operand")]
4654  "TARGET_NEON"
4655{
4656  rtx tmp = gen_reg_rtx (<MODE>mode);
4657  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4658  emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4659					tmp, const0_rtx));
4660  DONE;
4661})
4662
4663(define_expand "neon_vmlslu_n<mode>"
4664  [(match_operand:<V_widen> 0 "s_register_operand")
4665   (match_operand:<V_widen> 1 "s_register_operand")
4666   (match_operand:VMDI 2 "s_register_operand")
4667   (match_operand:<V_elem> 3 "s_register_operand")]
4668  "TARGET_NEON"
4669{
4670  rtx tmp = gen_reg_rtx (<MODE>mode);
4671  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4672  emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4673					tmp, const0_rtx));
4674  DONE;
4675})
4676
4677(define_expand "neon_vqdmlsl_n<mode>"
4678  [(match_operand:<V_widen> 0 "s_register_operand")
4679   (match_operand:<V_widen> 1 "s_register_operand")
4680   (match_operand:VMDI 2 "s_register_operand")
4681   (match_operand:<V_elem> 3 "s_register_operand")]
4682  "TARGET_NEON"
4683{
4684  rtx tmp = gen_reg_rtx (<MODE>mode);
4685  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4686  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4687					  tmp, const0_rtx));
4688  DONE;
4689})
4690
4691(define_insn "@neon_vext<mode>"
4692  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4693	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4694		      (match_operand:VDQX 2 "s_register_operand" "w")
4695                      (match_operand:SI 3 "immediate_operand" "i")]
4696                     UNSPEC_VEXT))]
4697  "TARGET_NEON"
4698{
4699  arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4700  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4701}
4702  [(set_attr "type" "neon_ext<q>")]
4703)
4704
4705(define_insn "@neon_vrev64<mode>"
4706  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4707	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4708                    UNSPEC_VREV64))]
4709  "TARGET_NEON"
4710  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4711  [(set_attr "type" "neon_rev<q>")]
4712)
4713
4714(define_insn "@neon_vrev32<mode>"
4715  [(set (match_operand:VX 0 "s_register_operand" "=w")
4716	(unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4717                   UNSPEC_VREV32))]
4718  "TARGET_NEON"
4719  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4720  [(set_attr "type" "neon_rev<q>")]
4721)
4722
4723(define_insn "@neon_vrev16<mode>"
4724  [(set (match_operand:VE 0 "s_register_operand" "=w")
4725	(unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4726                   UNSPEC_VREV16))]
4727  "TARGET_NEON"
4728  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4729  [(set_attr "type" "neon_rev<q>")]
4730)
4731
4732; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4733; allocation. For an intrinsic of form:
4734;   rD = vbsl_* (rS, rN, rM)
4735; We can use any of:
4736;   vbsl rS, rN, rM  (if D = S)
4737;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4738;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4739
4740(define_insn "neon_vbsl<mode>_internal"
4741  [(set (match_operand:VDQX 0 "s_register_operand"		 "=w,w,w")
4742	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4743		      (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4744                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4745                     UNSPEC_VBSL))]
4746  "TARGET_NEON"
4747  "@
4748  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4749  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4750  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4751  [(set_attr "type" "neon_bsl<q>")]
4752)
4753
4754(define_expand "neon_vbsl<mode>"
4755  [(set (match_operand:VDQX 0 "s_register_operand")
4756        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4757                      (match_operand:VDQX 2 "s_register_operand")
4758                      (match_operand:VDQX 3 "s_register_operand")]
4759                     UNSPEC_VBSL))]
4760  "TARGET_NEON"
4761{
4762  /* We can't alias operands together if they have different modes.  */
4763  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4764})
4765
4766;; vshl, vrshl
4767(define_insn "neon_v<shift_op><sup><mode>"
4768  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4769	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4770		       (match_operand:VDQIX 2 "s_register_operand" "w")]
4771                      VSHL))]
4772  "TARGET_NEON"
4773  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4774  [(set_attr "type" "neon_shift_imm<q>")]
4775)
4776
4777;; vqshl, vqrshl
4778(define_insn "neon_v<shift_op><sup><mode>"
4779  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4780	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4781		       (match_operand:VDQIX 2 "s_register_operand" "w")]
4782                      VQSHL))]
4783  "TARGET_NEON"
4784  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4785  [(set_attr "type" "neon_sat_shift_imm<q>")]
4786)
4787
4788;; vshr_n, vrshr_n
4789(define_insn "neon_v<shift_op><sup>_n<mode>"
4790  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4791	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4792		       (match_operand:SI 2 "immediate_operand" "i")]
4793                      VSHR_N))]
4794  "TARGET_NEON"
4795{
4796  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4797  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4798}
4799  [(set_attr "type" "neon_shift_imm<q>")]
4800)
4801
4802;; vshrn_n, vrshrn_n
4803(define_insn "neon_v<shift_op>_n<mode>"
4804  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4805	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4806			    (match_operand:SI 2 "immediate_operand" "i")]
4807                           VSHRN_N))]
4808  "TARGET_NEON"
4809{
4810  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4811  return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4812}
4813  [(set_attr "type" "neon_shift_imm_narrow_q")]
4814)
4815
4816;; vqshrn_n, vqrshrn_n
4817(define_insn "neon_v<shift_op><sup>_n<mode>"
4818  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4819	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4820			    (match_operand:SI 2 "immediate_operand" "i")]
4821                           VQSHRN_N))]
4822  "TARGET_NEON"
4823{
4824  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4825  return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4826}
4827  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4828)
4829
4830;; vqshrun_n, vqrshrun_n
4831(define_insn "neon_v<shift_op>_n<mode>"
4832  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4833	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4834			    (match_operand:SI 2 "immediate_operand" "i")]
4835                           VQSHRUN_N))]
4836  "TARGET_NEON"
4837{
4838  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4839  return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4840}
4841  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4842)
4843
4844(define_insn "neon_vshl_n<mode>"
4845  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4846	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4847		       (match_operand:SI 2 "immediate_operand" "i")]
4848                      UNSPEC_VSHL_N))]
4849  "TARGET_NEON"
4850{
4851  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4852  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4853}
4854  [(set_attr "type" "neon_shift_imm<q>")]
4855)
4856
4857(define_insn "neon_vqshl_<sup>_n<mode>"
4858  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4859	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4860		       (match_operand:SI 2 "immediate_operand" "i")]
4861                      VQSHL_N))]
4862  "TARGET_NEON"
4863{
4864  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4865  return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4866}
4867  [(set_attr "type" "neon_sat_shift_imm<q>")]
4868)
4869
4870(define_insn "neon_vqshlu_n<mode>"
4871  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4872	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4873		       (match_operand:SI 2 "immediate_operand" "i")]
4874                      UNSPEC_VQSHLU_N))]
4875  "TARGET_NEON"
4876{
4877  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4878  return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4879}
4880  [(set_attr "type" "neon_sat_shift_imm<q>")]
4881)
4882
4883(define_insn "neon_vshll<sup>_n<mode>"
4884  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4885	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4886			   (match_operand:SI 2 "immediate_operand" "i")]
4887			  VSHLL_N))]
4888  "TARGET_NEON"
4889{
4890  /* The boundaries are: 0 < imm <= size.  */
4891  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4892  return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4893}
4894  [(set_attr "type" "neon_shift_imm_long")]
4895)
4896
4897;; vsra_n, vrsra_n
4898(define_insn "neon_v<shift_op><sup>_n<mode>"
4899  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4900	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4901		       (match_operand:VDQIX 2 "s_register_operand" "w")
4902                       (match_operand:SI 3 "immediate_operand" "i")]
4903                      VSRA_N))]
4904  "TARGET_NEON"
4905{
4906  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4907  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4908}
4909  [(set_attr "type" "neon_shift_acc<q>")]
4910)
4911
4912(define_insn "neon_vsri_n<mode>"
4913  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4914	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4915        	       (match_operand:VDQIX 2 "s_register_operand" "w")
4916                       (match_operand:SI 3 "immediate_operand" "i")]
4917                      UNSPEC_VSRI))]
4918  "TARGET_NEON"
4919{
4920  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4921  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4922}
4923  [(set_attr "type" "neon_shift_reg<q>")]
4924)
4925
4926(define_insn "neon_vsli_n<mode>"
4927  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4928	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4929        	       (match_operand:VDQIX 2 "s_register_operand" "w")
4930                       (match_operand:SI 3 "immediate_operand" "i")]
4931                      UNSPEC_VSLI))]
4932  "TARGET_NEON"
4933{
4934  arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4935  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4936}
4937  [(set_attr "type" "neon_shift_reg<q>")]
4938)
4939
4940(define_insn "neon_vtbl1v8qi"
4941  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4942	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4943		      (match_operand:V8QI 2 "s_register_operand" "w")]
4944                     UNSPEC_VTBL))]
4945  "TARGET_NEON"
4946  "vtbl.8\t%P0, {%P1}, %P2"
4947  [(set_attr "type" "neon_tbl1")]
4948)
4949
4950(define_insn "neon_vtbl2v8qi"
4951  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4952	(unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4953		      (match_operand:V8QI 2 "s_register_operand" "w")]
4954                     UNSPEC_VTBL))]
4955  "TARGET_NEON"
4956{
4957  rtx ops[4];
4958  int tabbase = REGNO (operands[1]);
4959
4960  ops[0] = operands[0];
4961  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4962  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4963  ops[3] = operands[2];
4964  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4965
4966  return "";
4967}
4968  [(set_attr "type" "neon_tbl2")]
4969)
4970
4971(define_insn "neon_vtbl3v8qi"
4972  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4973	(unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4974		      (match_operand:V8QI 2 "s_register_operand" "w")]
4975                     UNSPEC_VTBL))]
4976  "TARGET_NEON"
4977{
4978  rtx ops[5];
4979  int tabbase = REGNO (operands[1]);
4980
4981  ops[0] = operands[0];
4982  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4983  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4984  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4985  ops[4] = operands[2];
4986  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4987
4988  return "";
4989}
4990  [(set_attr "type" "neon_tbl3")]
4991)
4992
4993(define_insn "neon_vtbl4v8qi"
4994  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4995	(unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4996		      (match_operand:V8QI 2 "s_register_operand" "w")]
4997                     UNSPEC_VTBL))]
4998  "TARGET_NEON"
4999{
5000  rtx ops[6];
5001  int tabbase = REGNO (operands[1]);
5002
5003  ops[0] = operands[0];
5004  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5005  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5006  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5007  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5008  ops[5] = operands[2];
5009  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5010
5011  return "";
5012}
5013  [(set_attr "type" "neon_tbl4")]
5014)
5015
5016;; These three are used by the vec_perm infrastructure for V16QImode.
5017(define_insn_and_split "neon_vtbl1v16qi"
5018  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5019	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5020		       (match_operand:V16QI 2 "s_register_operand" "w")]
5021		      UNSPEC_VTBL))]
5022  "TARGET_NEON"
5023  "#"
5024  "&& reload_completed"
5025  [(const_int 0)]
5026{
5027  rtx op0, op1, op2, part0, part2;
5028  unsigned ofs;
5029
5030  op0 = operands[0];
5031  op1 = gen_lowpart (TImode, operands[1]);
5032  op2 = operands[2];
5033
5034  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5035  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5036  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5037  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5038
5039  ofs = subreg_highpart_offset (V8QImode, V16QImode);
5040  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5041  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5042  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5043  DONE;
5044}
5045  [(set_attr "type" "multiple")]
5046)
5047
5048(define_insn_and_split "neon_vtbl2v16qi"
5049  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5050	(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5051		       (match_operand:V16QI 2 "s_register_operand" "w")]
5052		      UNSPEC_VTBL))]
5053  "TARGET_NEON"
5054  "#"
5055  "&& reload_completed"
5056  [(const_int 0)]
5057{
5058  rtx op0, op1, op2, part0, part2;
5059  unsigned ofs;
5060
5061  op0 = operands[0];
5062  op1 = operands[1];
5063  op2 = operands[2];
5064
5065  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5066  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5067  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5068  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5069
5070  ofs = subreg_highpart_offset (V8QImode, V16QImode);
5071  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5072  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5073  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5074  DONE;
5075}
5076  [(set_attr "type" "multiple")]
5077)
5078
5079;; ??? Logically we should extend the regular neon_vcombine pattern to
5080;; handle quad-word input modes, producing octa-word output modes.  But
5081;; that requires us to add support for octa-word vector modes in moves.
5082;; That seems overkill for this one use in vec_perm.
5083(define_insn_and_split "neon_vcombinev16qi"
5084  [(set (match_operand:OI 0 "s_register_operand" "=w")
5085	(unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5086		    (match_operand:V16QI 2 "s_register_operand" "w")]
5087		   UNSPEC_VCONCAT))]
5088  "TARGET_NEON"
5089  "#"
5090  "&& reload_completed"
5091  [(const_int 0)]
5092{
5093  neon_split_vcombine (operands);
5094  DONE;
5095}
5096[(set_attr "type" "multiple")]
5097)
5098
5099(define_insn "neon_vtbx1v8qi"
5100  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5101	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5102		      (match_operand:V8QI 2 "s_register_operand" "w")
5103		      (match_operand:V8QI 3 "s_register_operand" "w")]
5104                     UNSPEC_VTBX))]
5105  "TARGET_NEON"
5106  "vtbx.8\t%P0, {%P2}, %P3"
5107  [(set_attr "type" "neon_tbl1")]
5108)
5109
5110(define_insn "neon_vtbx2v8qi"
5111  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5112	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5113		      (match_operand:TI 2 "s_register_operand" "w")
5114		      (match_operand:V8QI 3 "s_register_operand" "w")]
5115                     UNSPEC_VTBX))]
5116  "TARGET_NEON"
5117{
5118  rtx ops[4];
5119  int tabbase = REGNO (operands[2]);
5120
5121  ops[0] = operands[0];
5122  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5123  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5124  ops[3] = operands[3];
5125  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5126
5127  return "";
5128}
5129  [(set_attr "type" "neon_tbl2")]
5130)
5131
5132(define_insn "neon_vtbx3v8qi"
5133  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5134	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5135		      (match_operand:EI 2 "s_register_operand" "w")
5136		      (match_operand:V8QI 3 "s_register_operand" "w")]
5137                     UNSPEC_VTBX))]
5138  "TARGET_NEON"
5139{
5140  rtx ops[5];
5141  int tabbase = REGNO (operands[2]);
5142
5143  ops[0] = operands[0];
5144  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5145  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5146  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5147  ops[4] = operands[3];
5148  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5149
5150  return "";
5151}
5152  [(set_attr "type" "neon_tbl3")]
5153)
5154
5155(define_insn "neon_vtbx4v8qi"
5156  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5157	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5158		      (match_operand:OI 2 "s_register_operand" "w")
5159		      (match_operand:V8QI 3 "s_register_operand" "w")]
5160                     UNSPEC_VTBX))]
5161  "TARGET_NEON"
5162{
5163  rtx ops[6];
5164  int tabbase = REGNO (operands[2]);
5165
5166  ops[0] = operands[0];
5167  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5168  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5169  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5170  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5171  ops[5] = operands[3];
5172  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5173
5174  return "";
5175}
5176  [(set_attr "type" "neon_tbl4")]
5177)
5178
5179(define_expand "@neon_vtrn<mode>_internal"
5180  [(parallel
5181    [(set (match_operand:VDQWH 0 "s_register_operand")
5182	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5183			 (match_operand:VDQWH 2 "s_register_operand")]
5184	   UNSPEC_VTRN1))
5185     (set (match_operand:VDQWH 3 "s_register_operand")
5186	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5187  "TARGET_NEON"
5188  ""
5189)
5190
5191;; Note: Different operand numbering to handle tied registers correctly.
5192(define_insn "*neon_vtrn<mode>_insn"
5193  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5194	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5195		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5196	 UNSPEC_VTRN1))
5197   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5198	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5199	 UNSPEC_VTRN2))]
5200  "TARGET_NEON"
5201  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5202  [(set_attr "type" "neon_permute<q>")]
5203)
5204
5205(define_expand "@neon_vzip<mode>_internal"
5206  [(parallel
5207    [(set (match_operand:VDQWH 0 "s_register_operand")
5208	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5209			 (match_operand:VDQWH 2 "s_register_operand")]
5210	   UNSPEC_VZIP1))
5211    (set (match_operand:VDQWH 3 "s_register_operand")
5212	 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5213  "TARGET_NEON"
5214  ""
5215)
5216
5217;; Note: Different operand numbering to handle tied registers correctly.
5218(define_insn "*neon_vzip<mode>_insn"
5219  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5220	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5221		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5222	 UNSPEC_VZIP1))
5223   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5224	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5225	 UNSPEC_VZIP2))]
5226  "TARGET_NEON"
5227  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5228  [(set_attr "type" "neon_zip<q>")]
5229)
5230
5231(define_expand "@neon_vuzp<mode>_internal"
5232  [(parallel
5233    [(set (match_operand:VDQWH 0 "s_register_operand")
5234	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5235			(match_operand:VDQWH 2 "s_register_operand")]
5236	   UNSPEC_VUZP1))
5237     (set (match_operand:VDQWH 3 "s_register_operand")
5238	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5239  "TARGET_NEON"
5240  ""
5241)
5242
5243;; Note: Different operand numbering to handle tied registers correctly.
5244(define_insn "*neon_vuzp<mode>_insn"
5245  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5246	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5247		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5248	 UNSPEC_VUZP1))
5249   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5250	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5251	 UNSPEC_VUZP2))]
5252  "TARGET_NEON"
5253  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5254  [(set_attr "type" "neon_zip<q>")]
5255)
5256
5257(define_expand "vec_load_lanes<mode><mode>"
5258  [(set (match_operand:VDQX 0 "s_register_operand")
5259        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5260                     UNSPEC_VLD1))]
5261  "TARGET_NEON")
5262
5263(define_insn "neon_vld1<mode>"
5264  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5265        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5266                    UNSPEC_VLD1))]
5267  "TARGET_NEON"
5268  "vld1.<V_sz_elem>\t%h0, %A1"
5269  [(set_attr "type" "neon_load1_1reg<q>")]
5270)
5271
5272;; The lane numbers in the RTL are in GCC lane order, having been flipped
5273;; in arm_expand_neon_args. The lane numbers are restored to architectural
5274;; lane order here.
5275(define_insn "neon_vld1_lane<mode>"
5276  [(set (match_operand:VDX 0 "s_register_operand" "=w")
5277        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5278                     (match_operand:VDX 2 "s_register_operand" "0")
5279                     (match_operand:SI 3 "immediate_operand" "i")]
5280                    UNSPEC_VLD1_LANE))]
5281  "TARGET_NEON"
5282{
5283  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5284  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5285  operands[3] = GEN_INT (lane);
5286  if (max == 1)
5287    return "vld1.<V_sz_elem>\t%P0, %A1";
5288  else
5289    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5290}
5291  [(set_attr "type" "neon_load1_one_lane<q>")]
5292)
5293
5294;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5295;; here on big endian targets.
5296(define_insn "neon_vld1_lane<mode>"
5297  [(set (match_operand:VQX 0 "s_register_operand" "=w")
5298        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5299                     (match_operand:VQX 2 "s_register_operand" "0")
5300                     (match_operand:SI 3 "immediate_operand" "i")]
5301                    UNSPEC_VLD1_LANE))]
5302  "TARGET_NEON"
5303{
5304  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5305  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5306  operands[3] = GEN_INT (lane);
5307  int regno = REGNO (operands[0]);
5308  if (lane >= max / 2)
5309    {
5310      lane -= max / 2;
5311      regno += 2;
5312      operands[3] = GEN_INT (lane);
5313    }
5314  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5315  if (max == 2)
5316    return "vld1.<V_sz_elem>\t%P0, %A1";
5317  else
5318    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5319}
5320  [(set_attr "type" "neon_load1_one_lane<q>")]
5321)
5322
5323(define_insn "neon_vld1_dup<mode>"
5324  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5325        (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5326  "TARGET_NEON"
5327  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5328  [(set_attr "type" "neon_load1_all_lanes<q>")]
5329)
5330
5331;; Special case for DImode.  Treat it exactly like a simple load.
5332(define_expand "neon_vld1_dupdi"
5333  [(set (match_operand:DI 0 "s_register_operand")
5334        (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5335		   UNSPEC_VLD1))]
5336  "TARGET_NEON"
5337  ""
5338)
5339
5340(define_insn "neon_vld1_dup<mode>"
5341  [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5342        (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5343  "TARGET_NEON"
5344{
5345  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5346}
5347  [(set_attr "type" "neon_load1_all_lanes<q>")]
5348)
5349
5350(define_insn_and_split "neon_vld1_dupv2di"
5351   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5352    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5353   "TARGET_NEON"
5354   "#"
5355   "&& reload_completed"
5356   [(const_int 0)]
5357   {
5358    rtx tmprtx = gen_lowpart (DImode, operands[0]);
5359    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5360    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5361    DONE;
5362    }
5363  [(set_attr "length" "8")
5364   (set_attr "type" "neon_load1_all_lanes_q")]
5365)
5366
5367(define_expand "vec_store_lanes<mode><mode>"
5368  [(set (match_operand:VDQX 0 "neon_struct_operand")
5369	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5370		     UNSPEC_VST1))]
5371  "TARGET_NEON")
5372
5373(define_insn "neon_vst1<mode>"
5374  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5375	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5376		     UNSPEC_VST1))]
5377  "TARGET_NEON"
5378  "vst1.<V_sz_elem>\t%h1, %A0"
5379  [(set_attr "type" "neon_store1_1reg<q>")])
5380
5381;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5382;; here on big endian targets.
5383(define_insn "neon_vst1_lane<mode>"
5384  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5385	(unspec:<V_elem>
5386	  [(match_operand:VDX 1 "s_register_operand" "w")
5387	   (match_operand:SI 2 "immediate_operand" "i")]
5388	  UNSPEC_VST1_LANE))]
5389  "TARGET_NEON"
5390{
5391  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5392  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5393  operands[2] = GEN_INT (lane);
5394  if (max == 1)
5395    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5396  else
5397    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5398}
5399  [(set_attr "type" "neon_store1_one_lane<q>")]
5400)
5401
5402;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5403;; here on big endian targets.
5404(define_insn "neon_vst1_lane<mode>"
5405  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5406	(unspec:<V_elem>
5407	  [(match_operand:VQX 1 "s_register_operand" "w")
5408	   (match_operand:SI 2 "immediate_operand" "i")]
5409	  UNSPEC_VST1_LANE))]
5410  "TARGET_NEON"
5411{
5412  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5413  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5414  int regno = REGNO (operands[1]);
5415  if (lane >= max / 2)
5416    {
5417      lane -= max / 2;
5418      regno += 2;
5419    }
5420  operands[2] = GEN_INT (lane);
5421  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5422  if (max == 2)
5423    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5424  else
5425    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5426}
5427  [(set_attr "type" "neon_store1_one_lane<q>")]
5428)
5429
5430(define_expand "vec_load_lanesti<mode>"
5431  [(set (match_operand:TI 0 "s_register_operand")
5432        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5433                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5434		   UNSPEC_VLD2))]
5435  "TARGET_NEON")
5436
5437(define_insn "neon_vld2<mode>"
5438  [(set (match_operand:TI 0 "s_register_operand" "=w")
5439        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5440                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5441                   UNSPEC_VLD2))]
5442  "TARGET_NEON"
5443{
5444  if (<V_sz_elem> == 64)
5445    return "vld1.64\t%h0, %A1";
5446  else
5447    return "vld2.<V_sz_elem>\t%h0, %A1";
5448}
5449  [(set (attr "type")
5450      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5451                    (const_string "neon_load1_2reg<q>")
5452                    (const_string "neon_load2_2reg<q>")))]
5453)
5454
5455(define_expand "vec_load_lanesoi<mode>"
5456  [(set (match_operand:OI 0 "s_register_operand")
5457        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5458                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5459		   UNSPEC_VLD2))]
5460  "TARGET_NEON")
5461
5462(define_insn "neon_vld2<mode>"
5463  [(set (match_operand:OI 0 "s_register_operand" "=w")
5464        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5465                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5466                   UNSPEC_VLD2))]
5467  "TARGET_NEON"
5468  "vld2.<V_sz_elem>\t%h0, %A1"
5469  [(set_attr "type" "neon_load2_2reg_q")])
5470
5471;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5472;; here on big endian targets.
5473(define_insn "neon_vld2_lane<mode>"
5474  [(set (match_operand:TI 0 "s_register_operand" "=w")
5475        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5476                    (match_operand:TI 2 "s_register_operand" "0")
5477                    (match_operand:SI 3 "immediate_operand" "i")
5478                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5479                   UNSPEC_VLD2_LANE))]
5480  "TARGET_NEON"
5481{
5482  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5483  int regno = REGNO (operands[0]);
5484  rtx ops[4];
5485  ops[0] = gen_rtx_REG (DImode, regno);
5486  ops[1] = gen_rtx_REG (DImode, regno + 2);
5487  ops[2] = operands[1];
5488  ops[3] = GEN_INT (lane);
5489  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5490  return "";
5491}
5492  [(set_attr "type" "neon_load2_one_lane<q>")]
5493)
5494
5495;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5496;; here on big endian targets.
5497(define_insn "neon_vld2_lane<mode>"
5498  [(set (match_operand:OI 0 "s_register_operand" "=w")
5499        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5500                    (match_operand:OI 2 "s_register_operand" "0")
5501                    (match_operand:SI 3 "immediate_operand" "i")
5502                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5503                   UNSPEC_VLD2_LANE))]
5504  "TARGET_NEON"
5505{
5506  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5507  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5508  int regno = REGNO (operands[0]);
5509  rtx ops[4];
5510  if (lane >= max / 2)
5511    {
5512      lane -= max / 2;
5513      regno += 2;
5514    }
5515  ops[0] = gen_rtx_REG (DImode, regno);
5516  ops[1] = gen_rtx_REG (DImode, regno + 4);
5517  ops[2] = operands[1];
5518  ops[3] = GEN_INT (lane);
5519  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5520  return "";
5521}
5522  [(set_attr "type" "neon_load2_one_lane<q>")]
5523)
5524
5525(define_insn "neon_vld2_dup<mode>"
5526  [(set (match_operand:TI 0 "s_register_operand" "=w")
5527        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5528                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5529                   UNSPEC_VLD2_DUP))]
5530  "TARGET_NEON"
5531{
5532  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5533    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5534  else
5535    return "vld1.<V_sz_elem>\t%h0, %A1";
5536}
5537  [(set (attr "type")
5538      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5539                    (const_string "neon_load2_all_lanes<q>")
5540                    (const_string "neon_load1_1reg<q>")))]
5541)
5542
5543(define_insn "neon_vld2_dupv8bf"
5544  [(set (match_operand:OI 0 "s_register_operand" "=w")
5545        (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5546                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5547                   UNSPEC_VLD2_DUP))]
5548  "TARGET_BF16_SIMD"
5549  {
5550    rtx ops[5];
5551    int tabbase = REGNO (operands[0]);
5552
5553    ops[4] = operands[1];
5554    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5555    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5556    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5557    ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5558    output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5559    return "";
5560  }
5561  [(set_attr "type" "neon_load2_all_lanes_q")]
5562)
5563
5564(define_expand "vec_store_lanesti<mode>"
5565  [(set (match_operand:TI 0 "neon_struct_operand")
5566	(unspec:TI [(match_operand:TI 1 "s_register_operand")
5567                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568                   UNSPEC_VST2))]
5569  "TARGET_NEON")
5570
5571(define_insn "neon_vst2<mode>"
5572  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5573        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5574                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575                   UNSPEC_VST2))]
5576  "TARGET_NEON"
5577{
5578  if (<V_sz_elem> == 64)
5579    return "vst1.64\t%h1, %A0";
5580  else
5581    return "vst2.<V_sz_elem>\t%h1, %A0";
5582}
5583  [(set (attr "type")
5584      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5585                    (const_string "neon_store1_2reg<q>")
5586                    (const_string "neon_store2_one_lane<q>")))]
5587)
5588
5589(define_expand "vec_store_lanesoi<mode>"
5590  [(set (match_operand:OI 0 "neon_struct_operand")
5591	(unspec:OI [(match_operand:OI 1 "s_register_operand")
5592                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593                   UNSPEC_VST2))]
5594  "TARGET_NEON")
5595
5596(define_insn "neon_vst2<mode>"
5597  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5598	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5599		    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600		   UNSPEC_VST2))]
5601  "TARGET_NEON"
5602  "vst2.<V_sz_elem>\t%h1, %A0"
5603  [(set_attr "type" "neon_store2_4reg<q>")]
5604)
5605
5606;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5607;; here on big endian targets.
5608(define_insn "neon_vst2_lane<mode>"
5609  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5610	(unspec:<V_two_elem>
5611	  [(match_operand:TI 1 "s_register_operand" "w")
5612	   (match_operand:SI 2 "immediate_operand" "i")
5613	   (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5614	  UNSPEC_VST2_LANE))]
5615  "TARGET_NEON"
5616{
5617  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5618  int regno = REGNO (operands[1]);
5619  rtx ops[4];
5620  ops[0] = operands[0];
5621  ops[1] = gen_rtx_REG (DImode, regno);
5622  ops[2] = gen_rtx_REG (DImode, regno + 2);
5623  ops[3] = GEN_INT (lane);
5624  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5625  return "";
5626}
5627  [(set_attr "type" "neon_store2_one_lane<q>")]
5628)
5629
5630;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5631;; here on big endian targets.
5632(define_insn "neon_vst2_lane<mode>"
5633  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5634        (unspec:<V_two_elem>
5635           [(match_operand:OI 1 "s_register_operand" "w")
5636            (match_operand:SI 2 "immediate_operand" "i")
5637            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5638           UNSPEC_VST2_LANE))]
5639  "TARGET_NEON"
5640{
5641  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5642  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5643  int regno = REGNO (operands[1]);
5644  rtx ops[4];
5645  if (lane >= max / 2)
5646    {
5647      lane -= max / 2;
5648      regno += 2;
5649    }
5650  ops[0] = operands[0];
5651  ops[1] = gen_rtx_REG (DImode, regno);
5652  ops[2] = gen_rtx_REG (DImode, regno + 4);
5653  ops[3] = GEN_INT (lane);
5654  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5655  return "";
5656}
5657  [(set_attr "type" "neon_store2_one_lane<q>")]
5658)
5659
5660(define_expand "vec_load_lanesei<mode>"
5661  [(set (match_operand:EI 0 "s_register_operand")
5662        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5663                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5664		   UNSPEC_VLD3))]
5665  "TARGET_NEON")
5666
5667(define_insn "neon_vld3<mode>"
5668  [(set (match_operand:EI 0 "s_register_operand" "=w")
5669        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5670                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5671                   UNSPEC_VLD3))]
5672  "TARGET_NEON"
5673{
5674  if (<V_sz_elem> == 64)
5675    return "vld1.64\t%h0, %A1";
5676  else
5677    return "vld3.<V_sz_elem>\t%h0, %A1";
5678}
5679  [(set (attr "type")
5680      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5681                    (const_string "neon_load1_3reg<q>")
5682                    (const_string "neon_load3_3reg<q>")))]
5683)
5684
5685(define_expand "vec_load_lanesci<mode>"
5686  [(match_operand:CI 0 "s_register_operand")
5687   (match_operand:CI 1 "neon_struct_operand")
5688   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5689  "TARGET_NEON"
5690{
5691  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5692  DONE;
5693})
5694
5695(define_expand "neon_vld3<mode>"
5696  [(match_operand:CI 0 "s_register_operand")
5697   (match_operand:CI 1 "neon_struct_operand")
5698   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5699  "TARGET_NEON"
5700{
5701  rtx mem;
5702
5703  mem = adjust_address (operands[1], EImode, 0);
5704  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5705  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5706  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5707  DONE;
5708})
5709
5710(define_insn "neon_vld3qa<mode>"
5711  [(set (match_operand:CI 0 "s_register_operand" "=w")
5712        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5713                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5714                   UNSPEC_VLD3A))]
5715  "TARGET_NEON"
5716{
5717  int regno = REGNO (operands[0]);
5718  rtx ops[4];
5719  ops[0] = gen_rtx_REG (DImode, regno);
5720  ops[1] = gen_rtx_REG (DImode, regno + 4);
5721  ops[2] = gen_rtx_REG (DImode, regno + 8);
5722  ops[3] = operands[1];
5723  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5724  return "";
5725}
5726  [(set_attr "type" "neon_load3_3reg<q>")]
5727)
5728
5729(define_insn "neon_vld3qb<mode>"
5730  [(set (match_operand:CI 0 "s_register_operand" "=w")
5731        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5732                    (match_operand:CI 2 "s_register_operand" "0")
5733                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5734                   UNSPEC_VLD3B))]
5735  "TARGET_NEON"
5736{
5737  int regno = REGNO (operands[0]);
5738  rtx ops[4];
5739  ops[0] = gen_rtx_REG (DImode, regno + 2);
5740  ops[1] = gen_rtx_REG (DImode, regno + 6);
5741  ops[2] = gen_rtx_REG (DImode, regno + 10);
5742  ops[3] = operands[1];
5743  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5744  return "";
5745}
5746  [(set_attr "type" "neon_load3_3reg<q>")]
5747)
5748
5749;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5750;; here on big endian targets.
5751(define_insn "neon_vld3_lane<mode>"
5752  [(set (match_operand:EI 0 "s_register_operand" "=w")
5753        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5754                    (match_operand:EI 2 "s_register_operand" "0")
5755                    (match_operand:SI 3 "immediate_operand" "i")
5756                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5757                   UNSPEC_VLD3_LANE))]
5758  "TARGET_NEON"
5759{
5760  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5761  int regno = REGNO (operands[0]);
5762  rtx ops[5];
5763  ops[0] = gen_rtx_REG (DImode, regno);
5764  ops[1] = gen_rtx_REG (DImode, regno + 2);
5765  ops[2] = gen_rtx_REG (DImode, regno + 4);
5766  ops[3] = operands[1];
5767  ops[4] = GEN_INT (lane);
5768  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5769                   ops);
5770  return "";
5771}
5772  [(set_attr "type" "neon_load3_one_lane<q>")]
5773)
5774
5775;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5776;; here on big endian targets.
5777(define_insn "neon_vld3_lane<mode>"
5778  [(set (match_operand:CI 0 "s_register_operand" "=w")
5779        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5780                    (match_operand:CI 2 "s_register_operand" "0")
5781                    (match_operand:SI 3 "immediate_operand" "i")
5782                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783                   UNSPEC_VLD3_LANE))]
5784  "TARGET_NEON"
5785{
5786  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5787  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5788  int regno = REGNO (operands[0]);
5789  rtx ops[5];
5790  if (lane >= max / 2)
5791    {
5792      lane -= max / 2;
5793      regno += 2;
5794    }
5795  ops[0] = gen_rtx_REG (DImode, regno);
5796  ops[1] = gen_rtx_REG (DImode, regno + 4);
5797  ops[2] = gen_rtx_REG (DImode, regno + 8);
5798  ops[3] = operands[1];
5799  ops[4] = GEN_INT (lane);
5800  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5801                   ops);
5802  return "";
5803}
5804  [(set_attr "type" "neon_load3_one_lane<q>")]
5805)
5806
5807(define_insn "neon_vld3_dup<mode>"
5808  [(set (match_operand:EI 0 "s_register_operand" "=w")
5809        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5810                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5811                   UNSPEC_VLD3_DUP))]
5812  "TARGET_NEON"
5813{
5814  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5815    {
5816      int regno = REGNO (operands[0]);
5817      rtx ops[4];
5818      ops[0] = gen_rtx_REG (DImode, regno);
5819      ops[1] = gen_rtx_REG (DImode, regno + 2);
5820      ops[2] = gen_rtx_REG (DImode, regno + 4);
5821      ops[3] = operands[1];
5822      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5823      return "";
5824    }
5825  else
5826    return "vld1.<V_sz_elem>\t%h0, %A1";
5827}
5828  [(set (attr "type")
5829      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5830                    (const_string "neon_load3_all_lanes<q>")
5831                    (const_string "neon_load1_1reg<q>")))])
5832
5833(define_insn "neon_vld3_dupv8bf"
5834  [(set (match_operand:CI 0 "s_register_operand" "=w")
5835        (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5836                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5837                   UNSPEC_VLD2_DUP))]
5838  "TARGET_BF16_SIMD"
5839  {
5840    rtx ops[4];
5841    int tabbase = REGNO (operands[0]);
5842
5843    ops[3] = operands[1];
5844    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5845    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5846    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5847    output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5848    return "";
5849  }
5850  [(set_attr "type" "neon_load3_all_lanes_q")]
5851)
5852
5853(define_expand "vec_store_lanesei<mode>"
5854  [(set (match_operand:EI 0 "neon_struct_operand")
5855	(unspec:EI [(match_operand:EI 1 "s_register_operand")
5856                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5857                   UNSPEC_VST3))]
5858  "TARGET_NEON")
5859
5860(define_insn "neon_vst3<mode>"
5861  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5862        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5863                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5864                   UNSPEC_VST3))]
5865  "TARGET_NEON"
5866{
5867  if (<V_sz_elem> == 64)
5868    return "vst1.64\t%h1, %A0";
5869  else
5870    return "vst3.<V_sz_elem>\t%h1, %A0";
5871}
5872  [(set (attr "type")
5873      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5874                    (const_string "neon_store1_3reg<q>")
5875                    (const_string "neon_store3_one_lane<q>")))])
5876
5877(define_expand "vec_store_lanesci<mode>"
5878  [(match_operand:CI 0 "neon_struct_operand")
5879   (match_operand:CI 1 "s_register_operand")
5880   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5881  "TARGET_NEON"
5882{
5883  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5884  DONE;
5885})
5886
5887(define_expand "neon_vst3<mode>"
5888  [(match_operand:CI 0 "neon_struct_operand")
5889   (match_operand:CI 1 "s_register_operand")
5890   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5891  "TARGET_NEON"
5892{
5893  rtx mem;
5894
5895  mem = adjust_address (operands[0], EImode, 0);
5896  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5897  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5898  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5899  DONE;
5900})
5901
5902(define_insn "neon_vst3qa<mode>"
5903  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5904        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5905                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5906                   UNSPEC_VST3A))]
5907  "TARGET_NEON"
5908{
5909  int regno = REGNO (operands[1]);
5910  rtx ops[4];
5911  ops[0] = operands[0];
5912  ops[1] = gen_rtx_REG (DImode, regno);
5913  ops[2] = gen_rtx_REG (DImode, regno + 4);
5914  ops[3] = gen_rtx_REG (DImode, regno + 8);
5915  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5916  return "";
5917}
5918  [(set_attr "type" "neon_store3_3reg<q>")]
5919)
5920
5921(define_insn "neon_vst3qb<mode>"
5922  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5923        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5924                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5925                   UNSPEC_VST3B))]
5926  "TARGET_NEON"
5927{
5928  int regno = REGNO (operands[1]);
5929  rtx ops[4];
5930  ops[0] = operands[0];
5931  ops[1] = gen_rtx_REG (DImode, regno + 2);
5932  ops[2] = gen_rtx_REG (DImode, regno + 6);
5933  ops[3] = gen_rtx_REG (DImode, regno + 10);
5934  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5935  return "";
5936}
5937  [(set_attr "type" "neon_store3_3reg<q>")]
5938)
5939
5940;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5941;; here on big endian targets.
5942(define_insn "neon_vst3_lane<mode>"
5943  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5944        (unspec:<V_three_elem>
5945           [(match_operand:EI 1 "s_register_operand" "w")
5946            (match_operand:SI 2 "immediate_operand" "i")
5947            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5948           UNSPEC_VST3_LANE))]
5949  "TARGET_NEON"
5950{
5951  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5952  int regno = REGNO (operands[1]);
5953  rtx ops[5];
5954  ops[0] = operands[0];
5955  ops[1] = gen_rtx_REG (DImode, regno);
5956  ops[2] = gen_rtx_REG (DImode, regno + 2);
5957  ops[3] = gen_rtx_REG (DImode, regno + 4);
5958  ops[4] = GEN_INT (lane);
5959  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5960                   ops);
5961  return "";
5962}
5963  [(set_attr "type" "neon_store3_one_lane<q>")]
5964)
5965
5966;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5967;; here on big endian targets.
5968(define_insn "neon_vst3_lane<mode>"
5969  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5970        (unspec:<V_three_elem>
5971           [(match_operand:CI 1 "s_register_operand" "w")
5972            (match_operand:SI 2 "immediate_operand" "i")
5973            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5974           UNSPEC_VST3_LANE))]
5975  "TARGET_NEON"
5976{
5977  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5978  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5979  int regno = REGNO (operands[1]);
5980  rtx ops[5];
5981  if (lane >= max / 2)
5982    {
5983      lane -= max / 2;
5984      regno += 2;
5985    }
5986  ops[0] = operands[0];
5987  ops[1] = gen_rtx_REG (DImode, regno);
5988  ops[2] = gen_rtx_REG (DImode, regno + 4);
5989  ops[3] = gen_rtx_REG (DImode, regno + 8);
5990  ops[4] = GEN_INT (lane);
5991  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5992                   ops);
5993  return "";
5994}
5995  [(set_attr "type" "neon_store3_one_lane<q>")]
5996)
5997
5998(define_expand "vec_load_lanesoi<mode>"
5999  [(set (match_operand:OI 0 "s_register_operand")
6000        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6001                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6002		   UNSPEC_VLD4))]
6003  "TARGET_NEON")
6004
6005(define_insn "neon_vld4<mode>"
6006  [(set (match_operand:OI 0 "s_register_operand" "=w")
6007        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6008                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6009                   UNSPEC_VLD4))]
6010  "TARGET_NEON"
6011{
6012  if (<V_sz_elem> == 64)
6013    return "vld1.64\t%h0, %A1";
6014  else
6015    return "vld4.<V_sz_elem>\t%h0, %A1";
6016}
6017  [(set (attr "type")
6018      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6019                    (const_string "neon_load1_4reg<q>")
6020                    (const_string "neon_load4_4reg<q>")))]
6021)
6022
6023(define_expand "vec_load_lanesxi<mode>"
6024  [(match_operand:XI 0 "s_register_operand")
6025   (match_operand:XI 1 "neon_struct_operand")
6026   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6027  "TARGET_NEON"
6028{
6029  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6030  DONE;
6031})
6032
6033(define_expand "neon_vld4<mode>"
6034  [(match_operand:XI 0 "s_register_operand")
6035   (match_operand:XI 1 "neon_struct_operand")
6036   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6037  "TARGET_NEON"
6038{
6039  rtx mem;
6040
6041  mem = adjust_address (operands[1], OImode, 0);
6042  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6043  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6044  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6045  DONE;
6046})
6047
6048(define_insn "neon_vld4qa<mode>"
6049  [(set (match_operand:XI 0 "s_register_operand" "=w")
6050        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6051                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6052                   UNSPEC_VLD4A))]
6053  "TARGET_NEON"
6054{
6055  int regno = REGNO (operands[0]);
6056  rtx ops[5];
6057  ops[0] = gen_rtx_REG (DImode, regno);
6058  ops[1] = gen_rtx_REG (DImode, regno + 4);
6059  ops[2] = gen_rtx_REG (DImode, regno + 8);
6060  ops[3] = gen_rtx_REG (DImode, regno + 12);
6061  ops[4] = operands[1];
6062  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6063  return "";
6064}
6065  [(set_attr "type" "neon_load4_4reg<q>")]
6066)
6067
6068(define_insn "neon_vld4qb<mode>"
6069  [(set (match_operand:XI 0 "s_register_operand" "=w")
6070        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6071                    (match_operand:XI 2 "s_register_operand" "0")
6072                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6073                   UNSPEC_VLD4B))]
6074  "TARGET_NEON"
6075{
6076  int regno = REGNO (operands[0]);
6077  rtx ops[5];
6078  ops[0] = gen_rtx_REG (DImode, regno + 2);
6079  ops[1] = gen_rtx_REG (DImode, regno + 6);
6080  ops[2] = gen_rtx_REG (DImode, regno + 10);
6081  ops[3] = gen_rtx_REG (DImode, regno + 14);
6082  ops[4] = operands[1];
6083  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6084  return "";
6085}
6086  [(set_attr "type" "neon_load4_4reg<q>")]
6087)
6088
6089;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6090;; here on big endian targets.
6091(define_insn "neon_vld4_lane<mode>"
6092  [(set (match_operand:OI 0 "s_register_operand" "=w")
6093        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6094                    (match_operand:OI 2 "s_register_operand" "0")
6095                    (match_operand:SI 3 "immediate_operand" "i")
6096                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6097                   UNSPEC_VLD4_LANE))]
6098  "TARGET_NEON"
6099{
6100  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6101  int regno = REGNO (operands[0]);
6102  rtx ops[6];
6103  ops[0] = gen_rtx_REG (DImode, regno);
6104  ops[1] = gen_rtx_REG (DImode, regno + 2);
6105  ops[2] = gen_rtx_REG (DImode, regno + 4);
6106  ops[3] = gen_rtx_REG (DImode, regno + 6);
6107  ops[4] = operands[1];
6108  ops[5] = GEN_INT (lane);
6109  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6110                   ops);
6111  return "";
6112}
6113  [(set_attr "type" "neon_load4_one_lane<q>")]
6114)
6115
6116;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6117;; here on big endian targets.
6118(define_insn "neon_vld4_lane<mode>"
6119  [(set (match_operand:XI 0 "s_register_operand" "=w")
6120        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6121                    (match_operand:XI 2 "s_register_operand" "0")
6122                    (match_operand:SI 3 "immediate_operand" "i")
6123                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6124                   UNSPEC_VLD4_LANE))]
6125  "TARGET_NEON"
6126{
6127  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6128  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6129  int regno = REGNO (operands[0]);
6130  rtx ops[6];
6131  if (lane >= max / 2)
6132    {
6133      lane -= max / 2;
6134      regno += 2;
6135    }
6136  ops[0] = gen_rtx_REG (DImode, regno);
6137  ops[1] = gen_rtx_REG (DImode, regno + 4);
6138  ops[2] = gen_rtx_REG (DImode, regno + 8);
6139  ops[3] = gen_rtx_REG (DImode, regno + 12);
6140  ops[4] = operands[1];
6141  ops[5] = GEN_INT (lane);
6142  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6143                   ops);
6144  return "";
6145}
6146  [(set_attr "type" "neon_load4_one_lane<q>")]
6147)
6148
6149(define_insn "neon_vld4_dup<mode>"
6150  [(set (match_operand:OI 0 "s_register_operand" "=w")
6151        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6152                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6153                   UNSPEC_VLD4_DUP))]
6154  "TARGET_NEON"
6155{
6156  if (GET_MODE_NUNITS (<MODE>mode) > 1)
6157    {
6158      int regno = REGNO (operands[0]);
6159      rtx ops[5];
6160      ops[0] = gen_rtx_REG (DImode, regno);
6161      ops[1] = gen_rtx_REG (DImode, regno + 2);
6162      ops[2] = gen_rtx_REG (DImode, regno + 4);
6163      ops[3] = gen_rtx_REG (DImode, regno + 6);
6164      ops[4] = operands[1];
6165      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6166                       ops);
6167      return "";
6168    }
6169  else
6170    return "vld1.<V_sz_elem>\t%h0, %A1";
6171}
6172  [(set (attr "type")
6173      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6174                    (const_string "neon_load4_all_lanes<q>")
6175                    (const_string "neon_load1_1reg<q>")))]
6176)
6177
6178(define_insn "neon_vld4_dupv8bf"
6179  [(set (match_operand:XI 0 "s_register_operand" "=w")
6180        (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
6181                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6182                   UNSPEC_VLD2_DUP))]
6183  "TARGET_BF16_SIMD"
6184  {
6185    rtx ops[5];
6186    int tabbase = REGNO (operands[0]);
6187
6188    ops[4] = operands[1];
6189    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
6190    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
6191    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
6192    ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
6193    output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
6194    return "";
6195  }
6196  [(set_attr "type" "neon_load4_all_lanes_q")]
6197)
6198
6199(define_expand "vec_store_lanesoi<mode>"
6200  [(set (match_operand:OI 0 "neon_struct_operand")
6201	(unspec:OI [(match_operand:OI 1 "s_register_operand")
6202                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6203                   UNSPEC_VST4))]
6204  "TARGET_NEON")
6205
6206(define_insn "neon_vst4<mode>"
6207  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6208        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6209                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6210                   UNSPEC_VST4))]
6211  "TARGET_NEON"
6212{
6213  if (<V_sz_elem> == 64)
6214    return "vst1.64\t%h1, %A0";
6215  else
6216    return "vst4.<V_sz_elem>\t%h1, %A0";
6217}
6218  [(set (attr "type")
6219      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6220                    (const_string "neon_store1_4reg<q>")
6221                    (const_string "neon_store4_4reg<q>")))]
6222)
6223
6224(define_expand "vec_store_lanesxi<mode>"
6225  [(match_operand:XI 0 "neon_struct_operand")
6226   (match_operand:XI 1 "s_register_operand")
6227   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6228  "TARGET_NEON"
6229{
6230  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6231  DONE;
6232})
6233
6234(define_expand "neon_vst4<mode>"
6235  [(match_operand:XI 0 "neon_struct_operand")
6236   (match_operand:XI 1 "s_register_operand")
6237   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6238  "TARGET_NEON"
6239{
6240  rtx mem;
6241
6242  mem = adjust_address (operands[0], OImode, 0);
6243  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6244  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6245  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6246  DONE;
6247})
6248
6249(define_insn "neon_vst4qa<mode>"
6250  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6251        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6252                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6253                   UNSPEC_VST4A))]
6254  "TARGET_NEON"
6255{
6256  int regno = REGNO (operands[1]);
6257  rtx ops[5];
6258  ops[0] = operands[0];
6259  ops[1] = gen_rtx_REG (DImode, regno);
6260  ops[2] = gen_rtx_REG (DImode, regno + 4);
6261  ops[3] = gen_rtx_REG (DImode, regno + 8);
6262  ops[4] = gen_rtx_REG (DImode, regno + 12);
6263  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6264  return "";
6265}
6266  [(set_attr "type" "neon_store4_4reg<q>")]
6267)
6268
6269(define_insn "neon_vst4qb<mode>"
6270  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6271        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6272                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6273                   UNSPEC_VST4B))]
6274  "TARGET_NEON"
6275{
6276  int regno = REGNO (operands[1]);
6277  rtx ops[5];
6278  ops[0] = operands[0];
6279  ops[1] = gen_rtx_REG (DImode, regno + 2);
6280  ops[2] = gen_rtx_REG (DImode, regno + 6);
6281  ops[3] = gen_rtx_REG (DImode, regno + 10);
6282  ops[4] = gen_rtx_REG (DImode, regno + 14);
6283  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6284  return "";
6285}
6286  [(set_attr "type" "neon_store4_4reg<q>")]
6287)
6288
6289;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6290;; here on big endian targets.
6291(define_insn "neon_vst4_lane<mode>"
6292  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6293        (unspec:<V_four_elem>
6294           [(match_operand:OI 1 "s_register_operand" "w")
6295            (match_operand:SI 2 "immediate_operand" "i")
6296            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6297           UNSPEC_VST4_LANE))]
6298  "TARGET_NEON"
6299{
6300  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6301  int regno = REGNO (operands[1]);
6302  rtx ops[6];
6303  ops[0] = operands[0];
6304  ops[1] = gen_rtx_REG (DImode, regno);
6305  ops[2] = gen_rtx_REG (DImode, regno + 2);
6306  ops[3] = gen_rtx_REG (DImode, regno + 4);
6307  ops[4] = gen_rtx_REG (DImode, regno + 6);
6308  ops[5] = GEN_INT (lane);
6309  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6310                   ops);
6311  return "";
6312}
6313  [(set_attr "type" "neon_store4_one_lane<q>")]
6314)
6315
6316;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6317;; here on big endian targets.
6318(define_insn "neon_vst4_lane<mode>"
6319  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6320        (unspec:<V_four_elem>
6321           [(match_operand:XI 1 "s_register_operand" "w")
6322            (match_operand:SI 2 "immediate_operand" "i")
6323            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6324           UNSPEC_VST4_LANE))]
6325  "TARGET_NEON"
6326{
6327  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6328  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6329  int regno = REGNO (operands[1]);
6330  rtx ops[6];
6331  if (lane >= max / 2)
6332    {
6333      lane -= max / 2;
6334      regno += 2;
6335    }
6336  ops[0] = operands[0];
6337  ops[1] = gen_rtx_REG (DImode, regno);
6338  ops[2] = gen_rtx_REG (DImode, regno + 4);
6339  ops[3] = gen_rtx_REG (DImode, regno + 8);
6340  ops[4] = gen_rtx_REG (DImode, regno + 12);
6341  ops[5] = GEN_INT (lane);
6342  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6343                   ops);
6344  return "";
6345}
6346  [(set_attr "type" "neon_store4_4reg<q>")]
6347)
6348
6349(define_insn "neon_vec_unpack<US>_lo_<mode>"
6350  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6351        (SE:<V_unpack> (vec_select:<V_HALF>
6352			  (match_operand:VU 1 "register_operand" "w")
6353			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
6354  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6355  "vmovl.<US><V_sz_elem> %q0, %e1"
6356  [(set_attr "type" "neon_shift_imm_long")]
6357)
6358
6359(define_insn "neon_vec_unpack<US>_hi_<mode>"
6360  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6361        (SE:<V_unpack> (vec_select:<V_HALF>
6362			  (match_operand:VU 1 "register_operand" "w")
6363			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
6364  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6365  "vmovl.<US><V_sz_elem> %q0, %f1"
6366  [(set_attr "type" "neon_shift_imm_long")]
6367)
6368
6369(define_expand "vec_unpack<US>_hi_<mode>"
6370  [(match_operand:<V_unpack> 0 "register_operand")
6371   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6372 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6373  {
6374   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6375   rtx t1;
6376   int i;
6377   for (i = 0; i < (<V_mode_nunits>/2); i++)
6378     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6379
6380   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6381   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6382                                                 operands[1],
6383					         t1));
6384   DONE;
6385  }
6386)
6387
6388(define_expand "vec_unpack<US>_lo_<mode>"
6389  [(match_operand:<V_unpack> 0 "register_operand")
6390   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6391 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6392  {
6393   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6394   rtx t1;
6395   int i;
6396   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6397     RTVEC_ELT (v, i) = GEN_INT (i);
6398   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6399   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6400                                                 operands[1],
6401				   	         t1));
6402   DONE;
6403  }
6404)
6405
6406(define_insn "neon_vec_<US>mult_lo_<mode>"
6407 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6408       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6409			   (match_operand:VU 1 "register_operand" "w")
6410                           (match_operand:VU 2 "vect_par_constant_low" "")))
6411 		        (SE:<V_unpack> (vec_select:<V_HALF>
6412                           (match_operand:VU 3 "register_operand" "w")
6413                           (match_dup 2)))))]
6414  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6415  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6416  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6417)
6418
6419(define_expand "vec_widen_<US>mult_lo_<mode>"
6420  [(match_operand:<V_unpack> 0 "register_operand")
6421   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6422   (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6423 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6424 {
6425   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6426   rtx t1;
6427   int i;
6428   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6429     RTVEC_ELT (v, i) = GEN_INT (i);
6430   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6431
6432   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6433 					       operands[1],
6434					       t1,
6435					       operands[2]));
6436   DONE;
6437 }
6438)
6439
6440(define_insn "neon_vec_<US>mult_hi_<mode>"
6441 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6442      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6443			    (match_operand:VU 1 "register_operand" "w")
6444			    (match_operand:VU 2 "vect_par_constant_high" "")))
6445		       (SE:<V_unpack> (vec_select:<V_HALF>
6446			    (match_operand:VU 3 "register_operand" "w")
6447			    (match_dup 2)))))]
6448  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6449  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6450  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6451)
6452
6453(define_expand "vec_widen_<US>mult_hi_<mode>"
6454  [(match_operand:<V_unpack> 0 "register_operand")
6455   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6456   (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6457 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6458 {
6459   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6460   rtx t1;
6461   int i;
6462   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6463     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6464   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6465
6466   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6467 					       operands[1],
6468					       t1,
6469					       operands[2]));
6470   DONE;
6471
6472 }
6473)
6474
6475(define_insn "neon_vec_<US>shiftl_<mode>"
6476 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6477       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6478       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6479  "TARGET_NEON"
6480{
6481  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6482}
6483  [(set_attr "type" "neon_shift_imm_long")]
6484)
6485
6486(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6487  [(match_operand:<V_unpack> 0 "register_operand")
6488   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6489   (match_operand:SI 2 "immediate_operand")]
6490 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6491 {
6492  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6493		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6494		operands[2]));
6495   DONE;
6496 }
6497)
6498
6499(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6500  [(match_operand:<V_unpack> 0 "register_operand")
6501   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6502   (match_operand:SI 2 "immediate_operand")]
6503 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6504 {
6505  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6506                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6507				     GET_MODE_SIZE (<V_HALF>mode)),
6508                operands[2]));
6509   DONE;
6510 }
6511)
6512
6513;; Vectorize for non-neon-quad case
6514(define_insn "neon_unpack<US>_<mode>"
6515 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6516       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6517 "TARGET_NEON"
6518 "vmovl.<US><V_sz_elem> %q0, %P1"
6519  [(set_attr "type" "neon_move")]
6520)
6521
6522(define_expand "vec_unpack<US>_lo_<mode>"
6523 [(match_operand:<V_double_width> 0 "register_operand")
6524  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6525 "TARGET_NEON"
6526{
6527  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6528  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6529  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6530
6531  DONE;
6532}
6533)
6534
6535(define_expand "vec_unpack<US>_hi_<mode>"
6536 [(match_operand:<V_double_width> 0 "register_operand")
6537  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6538 "TARGET_NEON"
6539{
6540  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6541  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6542  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6543
6544  DONE;
6545}
6546)
6547
6548(define_insn "neon_vec_<US>mult_<mode>"
6549 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6550       (mult:<V_widen> (SE:<V_widen>
6551		 	   (match_operand:VDI 1 "register_operand" "w"))
6552 		       (SE:<V_widen>
6553			   (match_operand:VDI 2 "register_operand" "w"))))]
6554  "TARGET_NEON"
6555  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6556  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6557)
6558
6559(define_expand "vec_widen_<US>mult_hi_<mode>"
6560  [(match_operand:<V_double_width> 0 "register_operand")
6561   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6562   (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6563 "TARGET_NEON"
6564 {
6565   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6566   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6567   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6568
6569   DONE;
6570
6571 }
6572)
6573
6574(define_expand "vec_widen_<US>mult_lo_<mode>"
6575  [(match_operand:<V_double_width> 0 "register_operand")
6576   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6577   (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6578 "TARGET_NEON"
6579 {
6580   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6581   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6582   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6583
6584   DONE;
6585
6586 }
6587)
6588
6589(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6590 [(match_operand:<V_double_width> 0 "register_operand")
6591   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6592   (match_operand:SI 2 "immediate_operand")]
6593 "TARGET_NEON"
6594 {
6595   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6596   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6597   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6598
6599   DONE;
6600 }
6601)
6602
6603(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6604  [(match_operand:<V_double_width> 0 "register_operand")
6605   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6606   (match_operand:SI 2 "immediate_operand")]
6607 "TARGET_NEON"
6608 {
6609   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6610   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6611   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6612
6613   DONE;
6614 }
6615)
6616
6617; FIXME: These instruction patterns can't be used safely in big-endian mode
6618; because the ordering of vector elements in Q registers is different from what
6619; the semantics of the instructions require.
6620
6621(define_insn "vec_pack_trunc_<mode>"
6622 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6623       (vec_concat:<V_narrow_pack>
6624		(truncate:<V_narrow>
6625			(match_operand:VN 1 "register_operand" "w"))
6626		(truncate:<V_narrow>
6627			(match_operand:VN 2 "register_operand" "w"))))]
6628 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6629 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6630 [(set_attr "type" "multiple")
6631  (set_attr "length" "8")]
6632)
6633
6634;; For the non-quad case.
6635(define_insn "neon_vec_pack_trunc_<mode>"
6636 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6637       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6638 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6639 "vmovn.i<V_sz_elem>\t%P0, %q1"
6640 [(set_attr "type" "neon_move_narrow_q")]
6641)
6642
6643(define_expand "vec_pack_trunc_<mode>"
6644 [(match_operand:<V_narrow_pack> 0 "register_operand")
6645  (match_operand:VSHFT 1 "register_operand")
6646  (match_operand:VSHFT 2 "register_operand")]
6647 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6648{
6649  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6650
6651  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6652  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6653  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6654  DONE;
6655})
6656
6657(define_insn "neon_vabd<mode>_2"
6658 [(set (match_operand:VF 0 "s_register_operand" "=w")
6659       (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6660			 (match_operand:VF 2 "s_register_operand" "w"))))]
6661 "TARGET_NEON && flag_unsafe_math_optimizations"
6662 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6663 [(set_attr "type" "neon_fp_abd_s<q>")]
6664)
6665
6666(define_insn "neon_vabd<mode>_3"
6667 [(set (match_operand:VF 0 "s_register_operand" "=w")
6668       (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6669			    (match_operand:VF 2 "s_register_operand" "w")]
6670		UNSPEC_VSUB)))]
6671 "TARGET_NEON && flag_unsafe_math_optimizations"
6672 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6673 [(set_attr "type" "neon_fp_abd_s<q>")]
6674)
6675
6676(define_insn "neon_<sup>mmlav16qi"
6677  [(set (match_operand:V4SI 0 "register_operand" "=w")
6678	(plus:V4SI
6679	 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6680		       (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6681	 (match_operand:V4SI 1 "register_operand" "0")))]
6682  "TARGET_I8MM"
6683  "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6684  [(set_attr "type" "neon_mla_s_q")]
6685)
6686
6687(define_insn "neon_vbfdot<VCVTF:mode>"
6688  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6689	(plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6690		    (unspec:VCVTF [
6691			    (match_operand:<VSF2BF> 2 "register_operand" "w")
6692			    (match_operand:<VSF2BF> 3 "register_operand" "w")]
6693		     UNSPEC_DOT_S)))]
6694  "TARGET_BF16_SIMD"
6695  "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6696  [(set_attr "type" "neon_dot<q>")]
6697)
6698
6699(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6700  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6701	(plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6702		    (unspec:VCVTF [
6703			    (match_operand:<VSF2BF> 2 "register_operand" "w")
6704			    (match_operand:V4BF 3 "register_operand" "x")
6705			    (match_operand:SI 4 "immediate_operand" "i")]
6706		     UNSPEC_DOT_S)))]
6707  "TARGET_BF16_SIMD"
6708  "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6709  [(set_attr "type" "neon_dot<q>")]
6710)
6711
6712(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6713  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6714	(plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6715		    (unspec:VCVTF [
6716			    (match_operand:<VSF2BF> 2 "register_operand" "w")
6717			    (match_operand:V8BF 3 "register_operand" "x")
6718			    (match_operand:SI 4 "immediate_operand" "i")]
6719		     UNSPEC_DOT_S)))]
6720  "TARGET_BF16_SIMD"
6721  {
6722    int lane = INTVAL (operands[4]);
6723    int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6724    if (lane < half)
6725      return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6726    else
6727      {
6728	operands[4] = GEN_INT (lane - half);
6729	return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6730      }
6731  }
6732  [(set_attr "type" "neon_dot<q>")]
6733)
6734
6735(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6736  [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6737       (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6738	UNSPEC_BFCVT))]
6739  "TARGET_BF16_SIMD"
6740  "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6741  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6742)
6743
6744(define_insn "neon_vbfcvtv4sf_highv8bf"
6745  [(set (match_operand:V8BF 0 "register_operand" "=w")
6746       (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6747		     (match_operand:V4SF 2 "register_operand" "w")]
6748	UNSPEC_BFCVT_HIGH))]
6749  "TARGET_BF16_SIMD"
6750  "vcvt.bf16.f32\\t%f0, %q2"
6751  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6752)
6753
6754(define_insn "neon_vbfcvtsf"
6755  [(set (match_operand:BF 0 "register_operand" "=t")
6756       (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6757	UNSPEC_BFCVT))]
6758  "TARGET_BF16_FP"
6759  "vcvtb.bf16.f32\\t%0, %1"
6760  [(set_attr "type" "f_cvt")]
6761)
6762
6763(define_insn "neon_vbfcvt<VBFCVT:mode>"
6764  [(set (match_operand:V4SF 0 "register_operand" "=w")
6765       (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6766	UNSPEC_BFCVT))]
6767  "TARGET_BF16_SIMD"
6768  "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6769  [(set_attr "type" "neon_shift_imm_q")]
6770)
6771
6772(define_insn "neon_vbfcvt_highv8bf"
6773  [(set (match_operand:V4SF 0 "register_operand" "=w")
6774       (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6775	UNSPEC_BFCVT_HIGH))]
6776  "TARGET_BF16_SIMD"
6777  "vshll.u32\\t%q0, %f1, #16"
6778  [(set_attr "type" "neon_shift_imm_q")]
6779)
6780
6781;; Convert a BF scalar operand to SF via VSHL.
6782;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6783;; would be allocated, therefore the operands must be converted to intermediate
6784;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6785(define_expand "neon_vbfcvtbf"
6786  [(match_operand:SF 0 "register_operand")
6787   (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6788  "TARGET_BF16_FP"
6789{
6790  rtx op0 = gen_reg_rtx (V2SImode);
6791  rtx op1 = gen_reg_rtx (V2SImode);
6792  emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6793  emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6794  emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6795  DONE;
6796})
6797
6798;; Convert BF mode to V2SI and V2SI to SF.
6799;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6800;; register indexed by a 32-bit sub-register number.
6801;; This will generate reloads but compiler can optimize out the moves.
6802;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6803;; range so that to avoid extra moves.
6804(define_insn "neon_vbfcvtbf_cvtmode<mode>"
6805  [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6806       (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6807	UNSPEC_BFCVT))]
6808  "TARGET_BF16_FP"
6809  ""
6810)
6811
6812(define_insn "neon_vmmlav8bf"
6813  [(set (match_operand:V4SF 0 "register_operand" "=w")
6814        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6815                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6816                                 (match_operand:V8BF 3 "register_operand" "w")]
6817                    UNSPEC_BFMMLA)))]
6818  "TARGET_BF16_SIMD"
6819  "vmmla.bf16\\t%q0, %q2, %q3"
6820  [(set_attr "type" "neon_fp_mla_s_q")]
6821)
6822
6823(define_insn "neon_vfma<bt>v8bf"
6824  [(set (match_operand:V4SF 0 "register_operand" "=w")
6825        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6826                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6827                                  (match_operand:V8BF 3 "register_operand" "w")]
6828                     BF_MA)))]
6829  "TARGET_BF16_SIMD"
6830  "vfma<bt>.bf16\\t%q0, %q2, %q3"
6831  [(set_attr "type" "neon_fp_mla_s_q")]
6832)
6833
6834(define_insn "neon_vfma<bt>_lanev8bf"
6835  [(set (match_operand:V4SF 0 "register_operand" "=w")
6836        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6837                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6838                                  (match_operand:V4BF 3 "register_operand" "x")
6839                                  (match_operand:SI 4 "const_int_operand" "n")]
6840                     BF_MA)))]
6841  "TARGET_BF16_SIMD"
6842  "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6843  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6844)
6845
6846(define_expand "neon_vfma<bt>_laneqv8bf"
6847  [(set (match_operand:V4SF 0 "register_operand" "=w")
6848        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6849                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6850                                  (match_operand:V8BF 3 "register_operand" "x")
6851                                  (match_operand:SI 4 "const_int_operand" "n")]
6852                     BF_MA)))]
6853  "TARGET_BF16_SIMD"
6854  {
6855    int lane = INTVAL (operands[4]);
6856    gcc_assert (IN_RANGE(lane, 0, 7));
6857    if (lane < 4)
6858    {
6859	emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6860    }
6861    else
6862      {
6863	rtx op_highpart = gen_reg_rtx (V4BFmode);
6864	emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6865	operands[4] = GEN_INT (lane - 4);
6866	emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6867      }
6868    DONE;
6869  }
6870  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6871)
6872