xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/aarch64/aarch64-simd.md (revision fb5eed702691094bd687fbf1ded189c87457cd35)
1;; Machine description for AArch64 AdvSIMD architecture.
2;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_expand "mov<mode>"
22  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23	(match_operand:VALL_F16 1 "general_operand" ""))]
24  "TARGET_SIMD"
25  "
26  /* Force the operand into a register if it is not an
27     immediate whose use can be replaced with xzr.
28     If the mode is 16 bytes wide, then we will be doing
29     a stp in DI mode, so we check the validity of that.
30     If the mode is 8 bytes wide, then we will do doing a
31     normal str, so the check need not apply.  */
32  if (GET_CODE (operands[0]) == MEM
33      && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34	   && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35		&& aarch64_mem_pair_operand (operands[0], DImode))
36	       || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37      operands[1] = force_reg (<MODE>mode, operands[1]);
38  "
39)
40
41(define_expand "movmisalign<mode>"
42  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43        (match_operand:VALL 1 "general_operand" ""))]
44  "TARGET_SIMD"
45{
46  /* This pattern is not permitted to fail during expansion: if both arguments
47     are non-registers (e.g. memory := constant, which can be created by the
48     auto-vectorizer), force operand 1 into a register.  */
49  if (!register_operand (operands[0], <MODE>mode)
50      && !register_operand (operands[1], <MODE>mode))
51    operands[1] = force_reg (<MODE>mode, operands[1]);
52})
53
54(define_insn "aarch64_simd_dup<mode>"
55  [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56	(vec_duplicate:VDQ_I
57	  (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58  "TARGET_SIMD"
59  "@
60   dup\\t%0.<Vtype>, %1.<Vetype>[0]
61   dup\\t%0.<Vtype>, %<vw>1"
62  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63)
64
65(define_insn "aarch64_simd_dup<mode>"
66  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67	(vec_duplicate:VDQF_F16
68	  (match_operand:<VEL> 1 "register_operand" "w")))]
69  "TARGET_SIMD"
70  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71  [(set_attr "type" "neon_dup<q>")]
72)
73
74(define_insn "aarch64_dup_lane<mode>"
75  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76	(vec_duplicate:VALL_F16
77	  (vec_select:<VEL>
78	    (match_operand:VALL_F16 1 "register_operand" "w")
79	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80          )))]
81  "TARGET_SIMD"
82  {
83    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85  }
86  [(set_attr "type" "neon_dup<q>")]
87)
88
89(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91	(vec_duplicate:VALL_F16_NO_V2Q
92	  (vec_select:<VEL>
93	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95          )))]
96  "TARGET_SIMD"
97  {
98    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100  }
101  [(set_attr "type" "neon_dup<q>")]
102)
103
104(define_insn "*aarch64_simd_mov<VD:mode>"
105  [(set (match_operand:VD 0 "nonimmediate_operand"
106		"=w, m,  m,  w, ?r, ?w, ?r, w")
107	(match_operand:VD 1 "general_operand"
108		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
109  "TARGET_SIMD
110   && (register_operand (operands[0], <MODE>mode)
111       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112{
113   switch (which_alternative)
114     {
115     case 0: return "ldr\t%d0, %1";
116     case 1: return "str\txzr, %0";
117     case 2: return "str\t%d1, %0";
118     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119     case 4: return "umov\t%0, %1.d[0]";
120     case 5: return "fmov\t%d0, %1";
121     case 6: return "mov\t%0, %1";
122     case 7:
123	return aarch64_output_simd_mov_immediate (operands[1], 64);
124     default: gcc_unreachable ();
125     }
126}
127  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
129		     mov_reg, neon_move<q>")]
130)
131
132(define_insn "*aarch64_simd_mov<VQ:mode>"
133  [(set (match_operand:VQ 0 "nonimmediate_operand"
134		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
135	(match_operand:VQ 1 "general_operand"
136		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
137  "TARGET_SIMD
138   && (register_operand (operands[0], <MODE>mode)
139       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140{
141  switch (which_alternative)
142    {
143    case 0:
144	return "ldr\t%q0, %1";
145    case 1:
146	return "stp\txzr, xzr, %0";
147    case 2:
148	return "str\t%q1, %0";
149    case 3:
150	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151    case 4:
152    case 5:
153    case 6:
154	return "#";
155    case 7:
156	return aarch64_output_simd_mov_immediate (operands[1], 128);
157    default:
158	gcc_unreachable ();
159    }
160}
161  [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162		     neon_logic<q>, multiple, multiple,\
163		     multiple, neon_move<q>")
164   (set_attr "length" "4,4,4,4,8,8,8,4")]
165)
166
167;; When storing lane zero we can use the normal STR and its more permissive
168;; addressing modes.
169
170(define_insn "aarch64_store_lane0<mode>"
171  [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173			(parallel [(match_operand 2 "const_int_operand" "n")])))]
174  "TARGET_SIMD
175   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176  "str\\t%<Vetype>1, %0"
177  [(set_attr "type" "neon_store1_1reg<q>")]
178)
179
180(define_insn "load_pair<DREG:mode><DREG2:mode>"
181  [(set (match_operand:DREG 0 "register_operand" "=w")
182	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183   (set (match_operand:DREG2 2 "register_operand" "=w")
184	(match_operand:DREG2 3 "memory_operand" "m"))]
185  "TARGET_SIMD
186   && rtx_equal_p (XEXP (operands[3], 0),
187		   plus_constant (Pmode,
188				  XEXP (operands[1], 0),
189				  GET_MODE_SIZE (<DREG:MODE>mode)))"
190  "ldp\\t%d0, %d2, %1"
191  [(set_attr "type" "neon_ldp")]
192)
193
194(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196	(match_operand:DREG 1 "register_operand" "w"))
197   (set (match_operand:DREG2 2 "memory_operand" "=m")
198	(match_operand:DREG2 3 "register_operand" "w"))]
199  "TARGET_SIMD
200   && rtx_equal_p (XEXP (operands[2], 0),
201		   plus_constant (Pmode,
202				  XEXP (operands[0], 0),
203				  GET_MODE_SIZE (<DREG:MODE>mode)))"
204  "stp\\t%d1, %d3, %0"
205  [(set_attr "type" "neon_stp")]
206)
207
208(define_insn "load_pair<VQ:mode><VQ2:mode>"
209  [(set (match_operand:VQ 0 "register_operand" "=w")
210	(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211   (set (match_operand:VQ2 2 "register_operand" "=w")
212	(match_operand:VQ2 3 "memory_operand" "m"))]
213  "TARGET_SIMD
214    && rtx_equal_p (XEXP (operands[3], 0),
215		    plus_constant (Pmode,
216			       XEXP (operands[1], 0),
217			       GET_MODE_SIZE (<VQ:MODE>mode)))"
218  "ldp\\t%q0, %q2, %1"
219  [(set_attr "type" "neon_ldp_q")]
220)
221
222(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223  [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224	(match_operand:VQ 1 "register_operand" "w"))
225   (set (match_operand:VQ2 2 "memory_operand" "=m")
226	(match_operand:VQ2 3 "register_operand" "w"))]
227  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228		plus_constant (Pmode,
229			       XEXP (operands[0], 0),
230			       GET_MODE_SIZE (<VQ:MODE>mode)))"
231  "stp\\t%q1, %q3, %0"
232  [(set_attr "type" "neon_stp_q")]
233)
234
235
236(define_split
237  [(set (match_operand:VQ 0 "register_operand" "")
238      (match_operand:VQ 1 "register_operand" ""))]
239  "TARGET_SIMD && reload_completed
240   && GP_REGNUM_P (REGNO (operands[0]))
241   && GP_REGNUM_P (REGNO (operands[1]))"
242  [(const_int 0)]
243{
244  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245  DONE;
246})
247
248(define_split
249  [(set (match_operand:VQ 0 "register_operand" "")
250        (match_operand:VQ 1 "register_operand" ""))]
251  "TARGET_SIMD && reload_completed
252   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254  [(const_int 0)]
255{
256  aarch64_split_simd_move (operands[0], operands[1]);
257  DONE;
258})
259
260(define_expand "@aarch64_split_simd_mov<mode>"
261  [(set (match_operand:VQ 0)
262        (match_operand:VQ 1))]
263  "TARGET_SIMD"
264  {
265    rtx dst = operands[0];
266    rtx src = operands[1];
267
268    if (GP_REGNUM_P (REGNO (src)))
269      {
270        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271        rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273        emit_insn
274          (gen_move_lo_quad_<mode> (dst, src_low_part));
275        emit_insn
276          (gen_move_hi_quad_<mode> (dst, src_high_part));
277      }
278
279    else
280      {
281        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283	rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284	rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286        emit_insn
287          (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288        emit_insn
289          (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290      }
291    DONE;
292  }
293)
294
295(define_insn "aarch64_simd_mov_from_<mode>low"
296  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297        (vec_select:<VHALF>
298          (match_operand:VQ 1 "register_operand" "w")
299          (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300  "TARGET_SIMD && reload_completed"
301  "umov\t%0, %1.d[0]"
302  [(set_attr "type" "neon_to_gp<q>")
303   (set_attr "length" "4")
304  ])
305
306(define_insn "aarch64_simd_mov_from_<mode>high"
307  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308        (vec_select:<VHALF>
309          (match_operand:VQ 1 "register_operand" "w")
310          (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311  "TARGET_SIMD && reload_completed"
312  "umov\t%0, %1.d[1]"
313  [(set_attr "type" "neon_to_gp<q>")
314   (set_attr "length" "4")
315  ])
316
317(define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319       (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320		(match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323  [(set_attr "type" "neon_logic<q>")]
324)
325
326(define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328       (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329		(match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332  [(set_attr "type" "neon_logic<q>")]
333)
334
335(define_insn "add<mode>3"
336  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337        (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338		  (match_operand:VDQ_I 2 "register_operand" "w")))]
339  "TARGET_SIMD"
340  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341  [(set_attr "type" "neon_add<q>")]
342)
343
344(define_insn "sub<mode>3"
345  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346        (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347		   (match_operand:VDQ_I 2 "register_operand" "w")))]
348  "TARGET_SIMD"
349  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350  [(set_attr "type" "neon_sub<q>")]
351)
352
353(define_insn "mul<mode>3"
354  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355        (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356		   (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357  "TARGET_SIMD"
358  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359  [(set_attr "type" "neon_mul_<Vetype><q>")]
360)
361
362(define_insn "bswap<mode>2"
363  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365  "TARGET_SIMD"
366  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367  [(set_attr "type" "neon_rev<q>")]
368)
369
370(define_insn "aarch64_rbit<mode>"
371  [(set (match_operand:VB 0 "register_operand" "=w")
372	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373		   UNSPEC_RBIT))]
374  "TARGET_SIMD"
375  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376  [(set_attr "type" "neon_rbit")]
377)
378
379(define_expand "ctz<mode>2"
380  [(set (match_operand:VS 0 "register_operand")
381        (ctz:VS (match_operand:VS 1 "register_operand")))]
382  "TARGET_SIMD"
383  {
384     emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386					     <MODE>mode, 0);
387     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389     DONE;
390  }
391)
392
393(define_expand "xorsign<mode>3"
394  [(match_operand:VHSDF 0 "register_operand")
395   (match_operand:VHSDF 1 "register_operand")
396   (match_operand:VHSDF 2 "register_operand")]
397  "TARGET_SIMD"
398{
399
400  machine_mode imode = <V_INT_EQUIV>mode;
401  rtx v_bitmask = gen_reg_rtx (imode);
402  rtx op1x = gen_reg_rtx (imode);
403  rtx op2x = gen_reg_rtx (imode);
404
405  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410  emit_move_insn (v_bitmask,
411		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412						     HOST_WIDE_INT_M1U << bits));
413
414  emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415  emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416  emit_move_insn (operands[0],
417		  lowpart_subreg (<MODE>mode, op1x, imode));
418  DONE;
419}
420)
421
422;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423;; fact that their usage need to guarantee that the source vectors are
424;; contiguous.  It would be wrong to describe the operation without being able
425;; to describe the permute that is also required, but even if that is done
426;; the permute would have been created as a LOAD_LANES which means the values
427;; in the registers are in the wrong order.
428(define_insn "aarch64_fcadd<rot><mode>"
429  [(set (match_operand:VHSDF 0 "register_operand" "=w")
430	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431		       (match_operand:VHSDF 2 "register_operand" "w")]
432		       FCADD))]
433  "TARGET_COMPLEX"
434  "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435  [(set_attr "type" "neon_fcadd")]
436)
437
438(define_insn "aarch64_fcmla<rot><mode>"
439  [(set (match_operand:VHSDF 0 "register_operand" "=w")
440	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442				   (match_operand:VHSDF 3 "register_operand" "w")]
443				   FCMLA)))]
444  "TARGET_COMPLEX"
445  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446  [(set_attr "type" "neon_fcmla")]
447)
448
449
450(define_insn "aarch64_fcmla_lane<rot><mode>"
451  [(set (match_operand:VHSDF 0 "register_operand" "=w")
452	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454				   (match_operand:VHSDF 3 "register_operand" "w")
455				   (match_operand:SI 4 "const_int_operand" "n")]
456				   FCMLA)))]
457  "TARGET_COMPLEX"
458{
459  operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
461}
462  [(set_attr "type" "neon_fcmla")]
463)
464
465(define_insn "aarch64_fcmla_laneq<rot>v4hf"
466  [(set (match_operand:V4HF 0 "register_operand" "=w")
467	(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468		   (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469				 (match_operand:V8HF 3 "register_operand" "w")
470				 (match_operand:SI 4 "const_int_operand" "n")]
471				 FCMLA)))]
472  "TARGET_COMPLEX"
473{
474  operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475  return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
476}
477  [(set_attr "type" "neon_fcmla")]
478)
479
480(define_insn "aarch64_fcmlaq_lane<rot><mode>"
481  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482	(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483		     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484				     (match_operand:<VHALF> 3 "register_operand" "w")
485				     (match_operand:SI 4 "const_int_operand" "n")]
486				     FCMLA)))]
487  "TARGET_COMPLEX"
488{
489  int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
490  operands[4]
491    = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
493}
494  [(set_attr "type" "neon_fcmla")]
495)
496
497;; These instructions map to the __builtins for the Dot Product operations.
498(define_insn "aarch64_<sur>dot<vsi2qi>"
499  [(set (match_operand:VS 0 "register_operand" "=w")
500	(plus:VS (match_operand:VS 1 "register_operand" "0")
501		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502			    (match_operand:<VSI2QI> 3 "register_operand" "w")]
503		DOTPROD)))]
504  "TARGET_DOTPROD"
505  "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506  [(set_attr "type" "neon_dot<q>")]
507)
508
509;; These expands map to the Dot Product optab the vectorizer checks for.
510;; The auto-vectorizer expects a dot product builtin that also does an
511;; accumulation into the provided register.
512;; Given the following pattern
513;;
514;; for (i=0; i<len; i++) {
515;;     c = a[i] * b[i];
516;;     r += c;
517;; }
518;; return result;
519;;
520;; This can be auto-vectorized to
521;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
522;;
523;; given enough iterations.  However the vectorizer can keep unrolling the loop
524;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
526;; ...
527;;
528;; and so the vectorizer provides r, in which the result has to be accumulated.
529(define_expand "<sur>dot_prod<vsi2qi>"
530  [(set (match_operand:VS 0 "register_operand")
531	(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532			    (match_operand:<VSI2QI> 2 "register_operand")]
533		 DOTPROD)
534		(match_operand:VS 3 "register_operand")))]
535  "TARGET_DOTPROD"
536{
537  emit_insn (
538    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
539				    operands[2]));
540  emit_insn (gen_rtx_SET (operands[0], operands[3]));
541  DONE;
542})
543
544;; These instructions map to the __builtins for the Dot Product
545;; indexed operations.
546(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547  [(set (match_operand:VS 0 "register_operand" "=w")
548	(plus:VS (match_operand:VS 1 "register_operand" "0")
549		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550			    (match_operand:V8QI 3 "register_operand" "<h_con>")
551			    (match_operand:SI 4 "immediate_operand" "i")]
552		DOTPROD)))]
553  "TARGET_DOTPROD"
554  {
555    operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
557  }
558  [(set_attr "type" "neon_dot<q>")]
559)
560
561(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562  [(set (match_operand:VS 0 "register_operand" "=w")
563	(plus:VS (match_operand:VS 1 "register_operand" "0")
564		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565			    (match_operand:V16QI 3 "register_operand" "<h_con>")
566			    (match_operand:SI 4 "immediate_operand" "i")]
567		DOTPROD)))]
568  "TARGET_DOTPROD"
569  {
570    operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
572  }
573  [(set_attr "type" "neon_dot<q>")]
574)
575
576(define_expand "copysign<mode>3"
577  [(match_operand:VHSDF 0 "register_operand")
578   (match_operand:VHSDF 1 "register_operand")
579   (match_operand:VHSDF 2 "register_operand")]
580  "TARGET_FLOAT && TARGET_SIMD"
581{
582  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
584
585  emit_move_insn (v_bitmask,
586		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587						     HOST_WIDE_INT_M1U << bits));
588  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589					 operands[2], operands[1]));
590  DONE;
591}
592)
593
594(define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
596    (mult:VMUL
597      (vec_duplicate:VMUL
598	  (vec_select:<VEL>
599	    (match_operand:VMUL 1 "register_operand" "<h_con>")
600	    (parallel [(match_operand:SI 2 "immediate_operand")])))
601      (match_operand:VMUL 3 "register_operand" "w")))]
602  "TARGET_SIMD"
603  {
604    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
606  }
607  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
608)
609
610(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612     (mult:VMUL_CHANGE_NLANES
613       (vec_duplicate:VMUL_CHANGE_NLANES
614	  (vec_select:<VEL>
615	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616	    (parallel [(match_operand:SI 2 "immediate_operand")])))
617      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
618  "TARGET_SIMD"
619  {
620    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
622  }
623  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
624)
625
626(define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
628    (mult:VMUL
629      (vec_duplicate:VMUL
630	    (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631      (match_operand:VMUL 2 "register_operand" "w")))]
632  "TARGET_SIMD"
633  "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
635)
636
637(define_insn "@aarch64_rsqrte<mode>"
638  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
640		     UNSPEC_RSQRTE))]
641  "TARGET_SIMD"
642  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
644
645(define_insn "@aarch64_rsqrts<mode>"
646  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
649	 UNSPEC_RSQRTS))]
650  "TARGET_SIMD"
651  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652  [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
653
654(define_expand "rsqrt<mode>2"
655  [(set (match_operand:VALLF 0 "register_operand" "=w")
656	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
657		     UNSPEC_RSQRT))]
658  "TARGET_SIMD"
659{
660  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
661  DONE;
662})
663
664(define_insn "*aarch64_mul3_elt_to_64v2df"
665  [(set (match_operand:DF 0 "register_operand" "=w")
666     (mult:DF
667       (vec_select:DF
668	 (match_operand:V2DF 1 "register_operand" "w")
669	 (parallel [(match_operand:SI 2 "immediate_operand")]))
670       (match_operand:DF 3 "register_operand" "w")))]
671  "TARGET_SIMD"
672  {
673    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
675  }
676  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
677)
678
679(define_insn "neg<mode>2"
680  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
682  "TARGET_SIMD"
683  "neg\t%0.<Vtype>, %1.<Vtype>"
684  [(set_attr "type" "neon_neg<q>")]
685)
686
687(define_insn "abs<mode>2"
688  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689        (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
690  "TARGET_SIMD"
691  "abs\t%0.<Vtype>, %1.<Vtype>"
692  [(set_attr "type" "neon_abs<q>")]
693)
694
695;; The intrinsic version of integer ABS must not be allowed to
696;; combine with any operation with an integerated ABS step, such
697;; as SABD.
698(define_insn "aarch64_abs<mode>"
699  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
700	  (unspec:VSDQ_I_DI
701	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
702	   UNSPEC_ABS))]
703  "TARGET_SIMD"
704  "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705  [(set_attr "type" "neon_abs<q>")]
706)
707
708;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709;; This isn't accurate as ABS treats always its input as a signed value.
710;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711;; Whereas SABD would return 192 (-64 signed) on the above example.
712;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713(define_insn "*aarch64_<su>abd<mode>_3"
714  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
715	(minus:VDQ_BHSI
716	  (USMAX:VDQ_BHSI
717	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
718	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
719	  (match_operator 3 "aarch64_<max_opp>"
720	    [(match_dup 1)
721	     (match_dup 2)])))]
722  "TARGET_SIMD"
723  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724  [(set_attr "type" "neon_abd<q>")]
725)
726
727(define_insn "aarch64_<sur>abdl2<mode>_3"
728  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730			  (match_operand:VDQV_S 2 "register_operand" "w")]
731	ABDL2))]
732  "TARGET_SIMD"
733  "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734  [(set_attr "type" "neon_abd<q>")]
735)
736
737(define_insn "aarch64_<sur>abal<mode>_4"
738  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740			  (match_operand:VDQV_S 2 "register_operand" "w")
741			 (match_operand:<VDBLW> 3 "register_operand" "0")]
742	ABAL))]
743  "TARGET_SIMD"
744  "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745  [(set_attr "type" "neon_arith_acc<q>")]
746)
747
748(define_insn "aarch64_<sur>adalp<mode>_3"
749  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751			  (match_operand:<VDBLW> 2 "register_operand" "0")]
752	ADALP))]
753  "TARGET_SIMD"
754  "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755  [(set_attr "type" "neon_reduc_add<q>")]
756)
757
758;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759;; inputs in operands 1 and 2.  The sequence also has to perform a widening
760;; reduction of the difference into a V4SI vector and accumulate that into
761;; operand 3 before copying that into the result operand 0.
762;; Perform that with a sequence of:
763;; UABDL2	tmp.8h, op1.16b, op2.16b
764;; UABAL	tmp.8h, op1.16b, op2.16b
765;; UADALP	op3.4s, tmp.8h
766;; MOV		op0, op3 // should be eliminated in later passes.
767;; The signed version just uses the signed variants of the above instructions.
768
769(define_expand "<sur>sadv16qi"
770  [(use (match_operand:V4SI 0 "register_operand"))
771   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
772		  (use (match_operand:V16QI 2 "register_operand"))] ABAL)
773   (use (match_operand:V4SI 3 "register_operand"))]
774  "TARGET_SIMD"
775  {
776    rtx reduc = gen_reg_rtx (V8HImode);
777    emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
778					       operands[2]));
779    emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
780					      operands[2], reduc));
781    emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
782					      operands[3]));
783    emit_move_insn (operands[0], operands[3]);
784    DONE;
785  }
786)
787
788(define_insn "aba<mode>_3"
789  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
790	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
791			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
792			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
793		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
794  "TARGET_SIMD"
795  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
796  [(set_attr "type" "neon_arith_acc<q>")]
797)
798
799(define_insn "fabd<mode>3"
800  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
801	(abs:VHSDF_HSDF
802	  (minus:VHSDF_HSDF
803	    (match_operand:VHSDF_HSDF 1 "register_operand" "w")
804	    (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
805  "TARGET_SIMD"
806  "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
807  [(set_attr "type" "neon_fp_abd_<stype><q>")]
808)
809
810;; For AND (vector, register) and BIC (vector, immediate)
811(define_insn "and<mode>3"
812  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
813	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
814		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
815  "TARGET_SIMD"
816  {
817    switch (which_alternative)
818      {
819      case 0:
820	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
821      case 1:
822	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
823						  AARCH64_CHECK_BIC);
824      default:
825	gcc_unreachable ();
826      }
827  }
828  [(set_attr "type" "neon_logic<q>")]
829)
830
831;; For ORR (vector, register) and ORR (vector, immediate)
832(define_insn "ior<mode>3"
833  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
834	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
835		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
836  "TARGET_SIMD"
837  {
838    switch (which_alternative)
839      {
840      case 0:
841	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
842      case 1:
843	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
844						  AARCH64_CHECK_ORR);
845      default:
846	gcc_unreachable ();
847      }
848  }
849  [(set_attr "type" "neon_logic<q>")]
850)
851
852(define_insn "xor<mode>3"
853  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
854        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
855		 (match_operand:VDQ_I 2 "register_operand" "w")))]
856  "TARGET_SIMD"
857  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
858  [(set_attr "type" "neon_logic<q>")]
859)
860
861(define_insn "one_cmpl<mode>2"
862  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
863        (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
864  "TARGET_SIMD"
865  "not\t%0.<Vbtype>, %1.<Vbtype>"
866  [(set_attr "type" "neon_logic<q>")]
867)
868
869(define_insn "aarch64_simd_vec_set<mode>"
870  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
871	(vec_merge:VALL_F16
872	    (vec_duplicate:VALL_F16
873		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
874	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
875	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
876  "TARGET_SIMD"
877  {
878   int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
879   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
880   switch (which_alternative)
881     {
882     case 0:
883	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
884     case 1:
885	return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
886     case 2:
887        return "ld1\\t{%0.<Vetype>}[%p2], %1";
888     default:
889	gcc_unreachable ();
890     }
891  }
892  [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
893)
894
895(define_insn "*aarch64_simd_vec_copy_lane<mode>"
896  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
897	(vec_merge:VALL_F16
898	    (vec_duplicate:VALL_F16
899	      (vec_select:<VEL>
900		(match_operand:VALL_F16 3 "register_operand" "w")
901		(parallel
902		  [(match_operand:SI 4 "immediate_operand" "i")])))
903	    (match_operand:VALL_F16 1 "register_operand" "0")
904	    (match_operand:SI 2 "immediate_operand" "i")))]
905  "TARGET_SIMD"
906  {
907    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
908    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
909    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
910
911    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
912  }
913  [(set_attr "type" "neon_ins<q>")]
914)
915
916(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
917  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
918	(vec_merge:VALL_F16_NO_V2Q
919	    (vec_duplicate:VALL_F16_NO_V2Q
920	      (vec_select:<VEL>
921		(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
922		(parallel
923		  [(match_operand:SI 4 "immediate_operand" "i")])))
924	    (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
925	    (match_operand:SI 2 "immediate_operand" "i")))]
926  "TARGET_SIMD"
927  {
928    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
929    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
930    operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
931					   INTVAL (operands[4]));
932
933    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
934  }
935  [(set_attr "type" "neon_ins<q>")]
936)
937
938(define_insn "aarch64_simd_lshr<mode>"
939 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
940       (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
941		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
942 "TARGET_SIMD"
943 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
944  [(set_attr "type" "neon_shift_imm<q>")]
945)
946
947(define_insn "aarch64_simd_ashr<mode>"
948 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
949       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
950		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
951 "TARGET_SIMD"
952 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
953  [(set_attr "type" "neon_shift_imm<q>")]
954)
955
956(define_insn "aarch64_simd_imm_shl<mode>"
957 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
958       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
959		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
960 "TARGET_SIMD"
961  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
962  [(set_attr "type" "neon_shift_imm<q>")]
963)
964
965(define_insn "aarch64_simd_reg_sshl<mode>"
966 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
967       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
968		   (match_operand:VDQ_I 2 "register_operand" "w")))]
969 "TARGET_SIMD"
970 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
971  [(set_attr "type" "neon_shift_reg<q>")]
972)
973
974(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
975 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
976       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
977		    (match_operand:VDQ_I 2 "register_operand" "w")]
978		   UNSPEC_ASHIFT_UNSIGNED))]
979 "TARGET_SIMD"
980 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
981  [(set_attr "type" "neon_shift_reg<q>")]
982)
983
984(define_insn "aarch64_simd_reg_shl<mode>_signed"
985 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
986       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
987		    (match_operand:VDQ_I 2 "register_operand" "w")]
988		   UNSPEC_ASHIFT_SIGNED))]
989 "TARGET_SIMD"
990 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
991  [(set_attr "type" "neon_shift_reg<q>")]
992)
993
994(define_expand "ashl<mode>3"
995  [(match_operand:VDQ_I 0 "register_operand" "")
996   (match_operand:VDQ_I 1 "register_operand" "")
997   (match_operand:SI  2 "general_operand" "")]
998 "TARGET_SIMD"
999{
1000  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1001  int shift_amount;
1002
1003  if (CONST_INT_P (operands[2]))
1004    {
1005      shift_amount = INTVAL (operands[2]);
1006      if (shift_amount >= 0 && shift_amount < bit_width)
1007        {
1008	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1009						       shift_amount);
1010	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1011						     operands[1],
1012						     tmp));
1013          DONE;
1014        }
1015      else
1016        {
1017          operands[2] = force_reg (SImode, operands[2]);
1018        }
1019    }
1020  else if (MEM_P (operands[2]))
1021    {
1022      operands[2] = force_reg (SImode, operands[2]);
1023    }
1024
1025  if (REG_P (operands[2]))
1026    {
1027      rtx tmp = gen_reg_rtx (<MODE>mode);
1028      emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1029					     convert_to_mode (<VEL>mode,
1030							      operands[2],
1031							      0)));
1032      emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1033						  tmp));
1034      DONE;
1035    }
1036  else
1037    FAIL;
1038}
1039)
1040
1041(define_expand "lshr<mode>3"
1042  [(match_operand:VDQ_I 0 "register_operand" "")
1043   (match_operand:VDQ_I 1 "register_operand" "")
1044   (match_operand:SI  2 "general_operand" "")]
1045 "TARGET_SIMD"
1046{
1047  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1048  int shift_amount;
1049
1050  if (CONST_INT_P (operands[2]))
1051    {
1052      shift_amount = INTVAL (operands[2]);
1053      if (shift_amount > 0 && shift_amount <= bit_width)
1054        {
1055	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1056						       shift_amount);
1057          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1058						  operands[1],
1059						  tmp));
1060	  DONE;
1061	}
1062      else
1063        operands[2] = force_reg (SImode, operands[2]);
1064    }
1065  else if (MEM_P (operands[2]))
1066    {
1067      operands[2] = force_reg (SImode, operands[2]);
1068    }
1069
1070  if (REG_P (operands[2]))
1071    {
1072      rtx tmp = gen_reg_rtx (SImode);
1073      rtx tmp1 = gen_reg_rtx (<MODE>mode);
1074      emit_insn (gen_negsi2 (tmp, operands[2]));
1075      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1076					     convert_to_mode (<VEL>mode,
1077							      tmp, 0)));
1078      emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1079							  operands[1],
1080							  tmp1));
1081      DONE;
1082    }
1083  else
1084    FAIL;
1085}
1086)
1087
1088(define_expand "ashr<mode>3"
1089  [(match_operand:VDQ_I 0 "register_operand" "")
1090   (match_operand:VDQ_I 1 "register_operand" "")
1091   (match_operand:SI  2 "general_operand" "")]
1092 "TARGET_SIMD"
1093{
1094  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1095  int shift_amount;
1096
1097  if (CONST_INT_P (operands[2]))
1098    {
1099      shift_amount = INTVAL (operands[2]);
1100      if (shift_amount > 0 && shift_amount <= bit_width)
1101        {
1102	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1103						       shift_amount);
1104          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1105						  operands[1],
1106						  tmp));
1107          DONE;
1108	}
1109      else
1110        operands[2] = force_reg (SImode, operands[2]);
1111    }
1112  else if (MEM_P (operands[2]))
1113    {
1114      operands[2] = force_reg (SImode, operands[2]);
1115    }
1116
1117  if (REG_P (operands[2]))
1118    {
1119      rtx tmp = gen_reg_rtx (SImode);
1120      rtx tmp1 = gen_reg_rtx (<MODE>mode);
1121      emit_insn (gen_negsi2 (tmp, operands[2]));
1122      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1123					     convert_to_mode (<VEL>mode,
1124							      tmp, 0)));
1125      emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1126							operands[1],
1127							tmp1));
1128      DONE;
1129    }
1130  else
1131    FAIL;
1132}
1133)
1134
1135(define_expand "vashl<mode>3"
1136 [(match_operand:VDQ_I 0 "register_operand" "")
1137  (match_operand:VDQ_I 1 "register_operand" "")
1138  (match_operand:VDQ_I 2 "register_operand" "")]
1139 "TARGET_SIMD"
1140{
1141  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1142					      operands[2]));
1143  DONE;
1144})
1145
1146;; Using mode VDQ_BHSI as there is no V2DImode neg!
1147;; Negating individual lanes most certainly offsets the
1148;; gain from vectorization.
1149(define_expand "vashr<mode>3"
1150 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1151  (match_operand:VDQ_BHSI 1 "register_operand" "")
1152  (match_operand:VDQ_BHSI 2 "register_operand" "")]
1153 "TARGET_SIMD"
1154{
1155  rtx neg = gen_reg_rtx (<MODE>mode);
1156  emit (gen_neg<mode>2 (neg, operands[2]));
1157  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1158						    neg));
1159  DONE;
1160})
1161
1162;; DI vector shift
1163(define_expand "aarch64_ashr_simddi"
1164  [(match_operand:DI 0 "register_operand" "=w")
1165   (match_operand:DI 1 "register_operand" "w")
1166   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1167  "TARGET_SIMD"
1168  {
1169    /* An arithmetic shift right by 64 fills the result with copies of the sign
1170       bit, just like asr by 63 - however the standard pattern does not handle
1171       a shift by 64.  */
1172    if (INTVAL (operands[2]) == 64)
1173      operands[2] = GEN_INT (63);
1174    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1175    DONE;
1176  }
1177)
1178
1179(define_expand "vlshr<mode>3"
1180 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1181  (match_operand:VDQ_BHSI 1 "register_operand" "")
1182  (match_operand:VDQ_BHSI 2 "register_operand" "")]
1183 "TARGET_SIMD"
1184{
1185  rtx neg = gen_reg_rtx (<MODE>mode);
1186  emit (gen_neg<mode>2 (neg, operands[2]));
1187  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1188						      neg));
1189  DONE;
1190})
1191
1192(define_expand "aarch64_lshr_simddi"
1193  [(match_operand:DI 0 "register_operand" "=w")
1194   (match_operand:DI 1 "register_operand" "w")
1195   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1196  "TARGET_SIMD"
1197  {
1198    if (INTVAL (operands[2]) == 64)
1199      emit_move_insn (operands[0], const0_rtx);
1200    else
1201      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1202    DONE;
1203  }
1204)
1205
1206;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1207(define_insn "vec_shr_<mode>"
1208  [(set (match_operand:VD 0 "register_operand" "=w")
1209        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1210		    (match_operand:SI 2 "immediate_operand" "i")]
1211		   UNSPEC_VEC_SHR))]
1212  "TARGET_SIMD"
1213  {
1214    if (BYTES_BIG_ENDIAN)
1215      return "shl %d0, %d1, %2";
1216    else
1217      return "ushr %d0, %d1, %2";
1218  }
1219  [(set_attr "type" "neon_shift_imm")]
1220)
1221
1222(define_expand "vec_set<mode>"
1223  [(match_operand:VALL_F16 0 "register_operand" "+w")
1224   (match_operand:<VEL> 1 "register_operand" "w")
1225   (match_operand:SI 2 "immediate_operand" "")]
1226  "TARGET_SIMD"
1227  {
1228    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1229    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1230					  GEN_INT (elem), operands[0]));
1231    DONE;
1232  }
1233)
1234
1235
1236(define_insn "aarch64_mla<mode>"
1237 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1238       (plus:VDQ_BHSI (mult:VDQ_BHSI
1239			(match_operand:VDQ_BHSI 2 "register_operand" "w")
1240			(match_operand:VDQ_BHSI 3 "register_operand" "w"))
1241		      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1242 "TARGET_SIMD"
1243 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1244  [(set_attr "type" "neon_mla_<Vetype><q>")]
1245)
1246
1247(define_insn "*aarch64_mla_elt<mode>"
1248 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1249       (plus:VDQHS
1250	 (mult:VDQHS
1251	   (vec_duplicate:VDQHS
1252	      (vec_select:<VEL>
1253		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1254		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1255	   (match_operand:VDQHS 3 "register_operand" "w"))
1256	 (match_operand:VDQHS 4 "register_operand" "0")))]
1257 "TARGET_SIMD"
1258  {
1259    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1260    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1261  }
1262  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1263)
1264
1265(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1266 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1267       (plus:VDQHS
1268	 (mult:VDQHS
1269	   (vec_duplicate:VDQHS
1270	      (vec_select:<VEL>
1271		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1272		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1273	   (match_operand:VDQHS 3 "register_operand" "w"))
1274	 (match_operand:VDQHS 4 "register_operand" "0")))]
1275 "TARGET_SIMD"
1276  {
1277    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1278    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1279  }
1280  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1281)
1282
1283(define_insn "*aarch64_mla_elt_merge<mode>"
1284  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1285	(plus:VDQHS
1286	  (mult:VDQHS (vec_duplicate:VDQHS
1287		  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1288		(match_operand:VDQHS 2 "register_operand" "w"))
1289	  (match_operand:VDQHS 3 "register_operand" "0")))]
1290 "TARGET_SIMD"
1291 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1292  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1293)
1294
1295(define_insn "aarch64_mls<mode>"
1296 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1297       (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1298		   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1299			      (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1300 "TARGET_SIMD"
1301 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1302  [(set_attr "type" "neon_mla_<Vetype><q>")]
1303)
1304
1305(define_insn "*aarch64_mls_elt<mode>"
1306 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1307       (minus:VDQHS
1308	 (match_operand:VDQHS 4 "register_operand" "0")
1309	 (mult:VDQHS
1310	   (vec_duplicate:VDQHS
1311	      (vec_select:<VEL>
1312		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1313		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1314	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1315 "TARGET_SIMD"
1316  {
1317    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1318    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1319  }
1320  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1321)
1322
1323(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1324 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1325       (minus:VDQHS
1326	 (match_operand:VDQHS 4 "register_operand" "0")
1327	 (mult:VDQHS
1328	   (vec_duplicate:VDQHS
1329	      (vec_select:<VEL>
1330		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1331		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1332	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1333 "TARGET_SIMD"
1334  {
1335    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1336    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1337  }
1338  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1339)
1340
1341(define_insn "*aarch64_mls_elt_merge<mode>"
1342  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1343	(minus:VDQHS
1344	  (match_operand:VDQHS 1 "register_operand" "0")
1345	  (mult:VDQHS (vec_duplicate:VDQHS
1346		  (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1347		(match_operand:VDQHS 3 "register_operand" "w"))))]
1348  "TARGET_SIMD"
1349  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1350  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1351)
1352
1353;; Max/Min operations.
1354(define_insn "<su><maxmin><mode>3"
1355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1356       (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1357		    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1358 "TARGET_SIMD"
1359 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1360  [(set_attr "type" "neon_minmax<q>")]
1361)
1362
1363(define_expand "<su><maxmin>v2di3"
1364 [(set (match_operand:V2DI 0 "register_operand" "")
1365       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1366                    (match_operand:V2DI 2 "register_operand" "")))]
1367 "TARGET_SIMD"
1368{
1369  enum rtx_code cmp_operator;
1370  rtx cmp_fmt;
1371
1372  switch (<CODE>)
1373    {
1374    case UMIN:
1375      cmp_operator = LTU;
1376      break;
1377    case SMIN:
1378      cmp_operator = LT;
1379      break;
1380    case UMAX:
1381      cmp_operator = GTU;
1382      break;
1383    case SMAX:
1384      cmp_operator = GT;
1385      break;
1386    default:
1387      gcc_unreachable ();
1388    }
1389
1390  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1391  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1392              operands[2], cmp_fmt, operands[1], operands[2]));
1393  DONE;
1394})
1395
1396;; Pairwise Integer Max/Min operations.
1397(define_insn "aarch64_<maxmin_uns>p<mode>"
1398 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1399       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1400			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1401			MAXMINV))]
1402 "TARGET_SIMD"
1403 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1404  [(set_attr "type" "neon_minmax<q>")]
1405)
1406
1407;; Pairwise FP Max/Min operations.
1408(define_insn "aarch64_<maxmin_uns>p<mode>"
1409 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1410       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1411		      (match_operand:VHSDF 2 "register_operand" "w")]
1412		      FMAXMINV))]
1413 "TARGET_SIMD"
1414 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1415  [(set_attr "type" "neon_minmax<q>")]
1416)
1417
1418;; vec_concat gives a new vector with the low elements from operand 1, and
1419;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1420;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1421;; What that means, is that the RTL descriptions of the below patterns
1422;; need to change depending on endianness.
1423
1424;; Move to the low architectural bits of the register.
1425;; On little-endian this is { operand, zeroes }
1426;; On big-endian this is { zeroes, operand }
1427
1428(define_insn "move_lo_quad_internal_<mode>"
1429  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1430	(vec_concat:VQ_NO2E
1431	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1432	  (vec_duplicate:<VHALF> (const_int 0))))]
1433  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434  "@
1435   dup\\t%d0, %1.d[0]
1436   fmov\\t%d0, %1
1437   dup\\t%d0, %1"
1438  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1439   (set_attr "length" "4")
1440   (set_attr "arch" "simd,fp,simd")]
1441)
1442
1443(define_insn "move_lo_quad_internal_<mode>"
1444  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1445	(vec_concat:VQ_2E
1446	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1447	  (const_int 0)))]
1448  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1449  "@
1450   dup\\t%d0, %1.d[0]
1451   fmov\\t%d0, %1
1452   dup\\t%d0, %1"
1453  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1454   (set_attr "length" "4")
1455   (set_attr "arch" "simd,fp,simd")]
1456)
1457
1458(define_insn "move_lo_quad_internal_be_<mode>"
1459  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1460	(vec_concat:VQ_NO2E
1461	  (vec_duplicate:<VHALF> (const_int 0))
1462	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1463  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1464  "@
1465   dup\\t%d0, %1.d[0]
1466   fmov\\t%d0, %1
1467   dup\\t%d0, %1"
1468  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1469   (set_attr "length" "4")
1470   (set_attr "arch" "simd,fp,simd")]
1471)
1472
1473(define_insn "move_lo_quad_internal_be_<mode>"
1474  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1475	(vec_concat:VQ_2E
1476	  (const_int 0)
1477	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1478  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1479  "@
1480   dup\\t%d0, %1.d[0]
1481   fmov\\t%d0, %1
1482   dup\\t%d0, %1"
1483  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484   (set_attr "length" "4")
1485   (set_attr "arch" "simd,fp,simd")]
1486)
1487
1488(define_expand "move_lo_quad_<mode>"
1489  [(match_operand:VQ 0 "register_operand")
1490   (match_operand:VQ 1 "register_operand")]
1491  "TARGET_SIMD"
1492{
1493  if (BYTES_BIG_ENDIAN)
1494    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1495  else
1496    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1497  DONE;
1498}
1499)
1500
1501;; Move operand1 to the high architectural bits of the register, keeping
1502;; the low architectural bits of operand2.
1503;; For little-endian this is { operand2, operand1 }
1504;; For big-endian this is { operand1, operand2 }
1505
1506(define_insn "aarch64_simd_move_hi_quad_<mode>"
1507  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1508        (vec_concat:VQ
1509          (vec_select:<VHALF>
1510                (match_dup 0)
1511                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1512	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1513  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1514  "@
1515   ins\\t%0.d[1], %1.d[0]
1516   ins\\t%0.d[1], %1"
1517  [(set_attr "type" "neon_ins")]
1518)
1519
1520(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1521  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1522        (vec_concat:VQ
1523	  (match_operand:<VHALF> 1 "register_operand" "w,r")
1524          (vec_select:<VHALF>
1525                (match_dup 0)
1526                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1527  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1528  "@
1529   ins\\t%0.d[1], %1.d[0]
1530   ins\\t%0.d[1], %1"
1531  [(set_attr "type" "neon_ins")]
1532)
1533
1534(define_expand "move_hi_quad_<mode>"
1535 [(match_operand:VQ 0 "register_operand" "")
1536  (match_operand:<VHALF> 1 "register_operand" "")]
1537 "TARGET_SIMD"
1538{
1539  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1540  if (BYTES_BIG_ENDIAN)
1541    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1542		    operands[1], p));
1543  else
1544    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1545		    operands[1], p));
1546  DONE;
1547})
1548
1549;; Narrowing operations.
1550
1551;; For doubles.
1552(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1553 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1554       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1555 "TARGET_SIMD"
1556 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1557  [(set_attr "type" "neon_shift_imm_narrow_q")]
1558)
1559
1560(define_expand "vec_pack_trunc_<mode>"
1561 [(match_operand:<VNARROWD> 0 "register_operand" "")
1562  (match_operand:VDN 1 "register_operand" "")
1563  (match_operand:VDN 2 "register_operand" "")]
1564 "TARGET_SIMD"
1565{
1566  rtx tempreg = gen_reg_rtx (<VDBL>mode);
1567  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1568  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1569
1570  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1571  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1572  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1573  DONE;
1574})
1575
1576;; For quads.
1577
1578(define_insn "vec_pack_trunc_<mode>"
1579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1580       (vec_concat:<VNARROWQ2>
1581	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1582	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1583 "TARGET_SIMD"
1584 {
1585   if (BYTES_BIG_ENDIAN)
1586     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1587   else
1588     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1589 }
1590  [(set_attr "type" "multiple")
1591   (set_attr "length" "8")]
1592)
1593
1594;; Widening operations.
1595
1596(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1597  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599			       (match_operand:VQW 1 "register_operand" "w")
1600			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1601			    )))]
1602  "TARGET_SIMD"
1603  "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1604  [(set_attr "type" "neon_shift_imm_long")]
1605)
1606
1607(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1608  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1610			       (match_operand:VQW 1 "register_operand" "w")
1611			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1612			    )))]
1613  "TARGET_SIMD"
1614  "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1615  [(set_attr "type" "neon_shift_imm_long")]
1616)
1617
1618(define_expand "vec_unpack<su>_hi_<mode>"
1619  [(match_operand:<VWIDE> 0 "register_operand" "")
1620   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1621  "TARGET_SIMD"
1622  {
1623    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1624    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1625							  operands[1], p));
1626    DONE;
1627  }
1628)
1629
1630(define_expand "vec_unpack<su>_lo_<mode>"
1631  [(match_operand:<VWIDE> 0 "register_operand" "")
1632   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1633  "TARGET_SIMD"
1634  {
1635    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1636    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1637							  operands[1], p));
1638    DONE;
1639  }
1640)
1641
1642;; Widening arithmetic.
1643
1644(define_insn "*aarch64_<su>mlal_lo<mode>"
1645  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1646        (plus:<VWIDE>
1647          (mult:<VWIDE>
1648              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1649                 (match_operand:VQW 2 "register_operand" "w")
1650                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1651              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1652                 (match_operand:VQW 4 "register_operand" "w")
1653                 (match_dup 3))))
1654          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1655  "TARGET_SIMD"
1656  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1657  [(set_attr "type" "neon_mla_<Vetype>_long")]
1658)
1659
1660(define_insn "*aarch64_<su>mlal_hi<mode>"
1661  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1662        (plus:<VWIDE>
1663          (mult:<VWIDE>
1664              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665                 (match_operand:VQW 2 "register_operand" "w")
1666                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668                 (match_operand:VQW 4 "register_operand" "w")
1669                 (match_dup 3))))
1670          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1671  "TARGET_SIMD"
1672  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1673  [(set_attr "type" "neon_mla_<Vetype>_long")]
1674)
1675
1676(define_insn "*aarch64_<su>mlsl_lo<mode>"
1677  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1678        (minus:<VWIDE>
1679          (match_operand:<VWIDE> 1 "register_operand" "0")
1680          (mult:<VWIDE>
1681              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                 (match_operand:VQW 2 "register_operand" "w")
1683                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                 (match_operand:VQW 4 "register_operand" "w")
1686                 (match_dup 3))))))]
1687  "TARGET_SIMD"
1688  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1689  [(set_attr "type" "neon_mla_<Vetype>_long")]
1690)
1691
1692(define_insn "*aarch64_<su>mlsl_hi<mode>"
1693  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1694        (minus:<VWIDE>
1695          (match_operand:<VWIDE> 1 "register_operand" "0")
1696          (mult:<VWIDE>
1697              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698                 (match_operand:VQW 2 "register_operand" "w")
1699                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701                 (match_operand:VQW 4 "register_operand" "w")
1702                 (match_dup 3))))))]
1703  "TARGET_SIMD"
1704  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1705  [(set_attr "type" "neon_mla_<Vetype>_long")]
1706)
1707
1708(define_insn "*aarch64_<su>mlal<mode>"
1709  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1710        (plus:<VWIDE>
1711          (mult:<VWIDE>
1712            (ANY_EXTEND:<VWIDE>
1713              (match_operand:VD_BHSI 1 "register_operand" "w"))
1714            (ANY_EXTEND:<VWIDE>
1715              (match_operand:VD_BHSI 2 "register_operand" "w")))
1716          (match_operand:<VWIDE> 3 "register_operand" "0")))]
1717  "TARGET_SIMD"
1718  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1719  [(set_attr "type" "neon_mla_<Vetype>_long")]
1720)
1721
1722(define_insn "*aarch64_<su>mlsl<mode>"
1723  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1724        (minus:<VWIDE>
1725          (match_operand:<VWIDE> 1 "register_operand" "0")
1726          (mult:<VWIDE>
1727            (ANY_EXTEND:<VWIDE>
1728              (match_operand:VD_BHSI 2 "register_operand" "w"))
1729            (ANY_EXTEND:<VWIDE>
1730              (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1731  "TARGET_SIMD"
1732  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1733  [(set_attr "type" "neon_mla_<Vetype>_long")]
1734)
1735
1736(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1738       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1739			   (match_operand:VQW 1 "register_operand" "w")
1740                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1741		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742                           (match_operand:VQW 2 "register_operand" "w")
1743                           (match_dup 3)))))]
1744  "TARGET_SIMD"
1745  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1746  [(set_attr "type" "neon_mul_<Vetype>_long")]
1747)
1748
1749(define_expand "vec_widen_<su>mult_lo_<mode>"
1750  [(match_operand:<VWIDE> 0 "register_operand" "")
1751   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1752   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1753 "TARGET_SIMD"
1754 {
1755   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1756   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1757						       operands[1],
1758						       operands[2], p));
1759   DONE;
1760 }
1761)
1762
1763(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1764 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1765      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1766			    (match_operand:VQW 1 "register_operand" "w")
1767			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1768		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769			    (match_operand:VQW 2 "register_operand" "w")
1770			    (match_dup 3)))))]
1771  "TARGET_SIMD"
1772  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1773  [(set_attr "type" "neon_mul_<Vetype>_long")]
1774)
1775
1776(define_expand "vec_widen_<su>mult_hi_<mode>"
1777  [(match_operand:<VWIDE> 0 "register_operand" "")
1778   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1779   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1780 "TARGET_SIMD"
1781 {
1782   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1783   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1784						       operands[1],
1785						       operands[2], p));
1786   DONE;
1787
1788 }
1789)
1790
1791;; FP vector operations.
1792;; AArch64 AdvSIMD supports single-precision (32-bit) and
1793;; double-precision (64-bit) floating-point data types and arithmetic as
1794;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1795;; without the need for -ffast-math or -funsafe-math-optimizations.
1796;;
1797;; Floating-point operations can raise an exception.  Vectorizing such
1798;; operations are safe because of reasons explained below.
1799;;
1800;; ARMv8 permits an extension to enable trapped floating-point
1801;; exception handling, however this is an optional feature.  In the
1802;; event of a floating-point exception being raised by vectorised
1803;; code then:
1804;; 1.  If trapped floating-point exceptions are available, then a trap
1805;;     will be taken when any lane raises an enabled exception.  A trap
1806;;     handler may determine which lane raised the exception.
1807;; 2.  Alternatively a sticky exception flag is set in the
1808;;     floating-point status register (FPSR).  Software may explicitly
1809;;     test the exception flags, in which case the tests will either
1810;;     prevent vectorisation, allowing precise identification of the
1811;;     failing operation, or if tested outside of vectorisable regions
1812;;     then the specific operation and lane are not of interest.
1813
1814;; FP arithmetic operations.
1815
1816(define_insn "add<mode>3"
1817 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1818       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1819		   (match_operand:VHSDF 2 "register_operand" "w")))]
1820 "TARGET_SIMD"
1821 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1822  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1823)
1824
1825(define_insn "sub<mode>3"
1826 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1827       (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1828		    (match_operand:VHSDF 2 "register_operand" "w")))]
1829 "TARGET_SIMD"
1830 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1831  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1832)
1833
1834(define_insn "mul<mode>3"
1835 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1836       (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1837		   (match_operand:VHSDF 2 "register_operand" "w")))]
1838 "TARGET_SIMD"
1839 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1840  [(set_attr "type" "neon_fp_mul_<stype><q>")]
1841)
1842
1843(define_expand "div<mode>3"
1844 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1845       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1846		  (match_operand:VHSDF 2 "register_operand" "w")))]
1847 "TARGET_SIMD"
1848{
1849  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1850    DONE;
1851
1852  operands[1] = force_reg (<MODE>mode, operands[1]);
1853})
1854
1855(define_insn "*div<mode>3"
1856 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1857       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1858		 (match_operand:VHSDF 2 "register_operand" "w")))]
1859 "TARGET_SIMD"
1860 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1861  [(set_attr "type" "neon_fp_div_<stype><q>")]
1862)
1863
1864(define_insn "neg<mode>2"
1865 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1866       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1867 "TARGET_SIMD"
1868 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1869  [(set_attr "type" "neon_fp_neg_<stype><q>")]
1870)
1871
1872(define_insn "abs<mode>2"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1875 "TARGET_SIMD"
1876 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1877  [(set_attr "type" "neon_fp_abs_<stype><q>")]
1878)
1879
1880(define_insn "fma<mode>4"
1881  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1882       (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1883		  (match_operand:VHSDF 2 "register_operand" "w")
1884		  (match_operand:VHSDF 3 "register_operand" "0")))]
1885  "TARGET_SIMD"
1886 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1887  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1888)
1889
1890(define_insn "*aarch64_fma4_elt<mode>"
1891  [(set (match_operand:VDQF 0 "register_operand" "=w")
1892    (fma:VDQF
1893      (vec_duplicate:VDQF
1894	(vec_select:<VEL>
1895	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1896	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1897      (match_operand:VDQF 3 "register_operand" "w")
1898      (match_operand:VDQF 4 "register_operand" "0")))]
1899  "TARGET_SIMD"
1900  {
1901    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1902    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1903  }
1904  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1905)
1906
1907(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1908  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1909    (fma:VDQSF
1910      (vec_duplicate:VDQSF
1911	(vec_select:<VEL>
1912	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1913	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1914      (match_operand:VDQSF 3 "register_operand" "w")
1915      (match_operand:VDQSF 4 "register_operand" "0")))]
1916  "TARGET_SIMD"
1917  {
1918    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1919    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1920  }
1921  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1922)
1923
1924(define_insn "*aarch64_fma4_elt_from_dup<mode>"
1925  [(set (match_operand:VMUL 0 "register_operand" "=w")
1926    (fma:VMUL
1927      (vec_duplicate:VMUL
1928	  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1929      (match_operand:VMUL 2 "register_operand" "w")
1930      (match_operand:VMUL 3 "register_operand" "0")))]
1931  "TARGET_SIMD"
1932  "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1933  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1934)
1935
1936(define_insn "*aarch64_fma4_elt_to_64v2df"
1937  [(set (match_operand:DF 0 "register_operand" "=w")
1938    (fma:DF
1939	(vec_select:DF
1940	  (match_operand:V2DF 1 "register_operand" "w")
1941	  (parallel [(match_operand:SI 2 "immediate_operand")]))
1942      (match_operand:DF 3 "register_operand" "w")
1943      (match_operand:DF 4 "register_operand" "0")))]
1944  "TARGET_SIMD"
1945  {
1946    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1947    return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1948  }
1949  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1950)
1951
1952(define_insn "fnma<mode>4"
1953  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1954	(fma:VHSDF
1955	  (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1956	  (match_operand:VHSDF 2 "register_operand" "w")
1957	  (match_operand:VHSDF 3 "register_operand" "0")))]
1958  "TARGET_SIMD"
1959  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1960  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1961)
1962
1963(define_insn "*aarch64_fnma4_elt<mode>"
1964  [(set (match_operand:VDQF 0 "register_operand" "=w")
1965    (fma:VDQF
1966      (neg:VDQF
1967        (match_operand:VDQF 3 "register_operand" "w"))
1968      (vec_duplicate:VDQF
1969	(vec_select:<VEL>
1970	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1971	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1972      (match_operand:VDQF 4 "register_operand" "0")))]
1973  "TARGET_SIMD"
1974  {
1975    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1976    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1977  }
1978  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1979)
1980
1981(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1982  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1983    (fma:VDQSF
1984      (neg:VDQSF
1985        (match_operand:VDQSF 3 "register_operand" "w"))
1986      (vec_duplicate:VDQSF
1987	(vec_select:<VEL>
1988	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1989	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1990      (match_operand:VDQSF 4 "register_operand" "0")))]
1991  "TARGET_SIMD"
1992  {
1993    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1994    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1995  }
1996  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1997)
1998
1999(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2000  [(set (match_operand:VMUL 0 "register_operand" "=w")
2001    (fma:VMUL
2002      (neg:VMUL
2003        (match_operand:VMUL 2 "register_operand" "w"))
2004      (vec_duplicate:VMUL
2005	(match_operand:<VEL> 1 "register_operand" "<h_con>"))
2006      (match_operand:VMUL 3 "register_operand" "0")))]
2007  "TARGET_SIMD"
2008  "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2009  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2010)
2011
2012(define_insn "*aarch64_fnma4_elt_to_64v2df"
2013  [(set (match_operand:DF 0 "register_operand" "=w")
2014    (fma:DF
2015      (vec_select:DF
2016	(match_operand:V2DF 1 "register_operand" "w")
2017	(parallel [(match_operand:SI 2 "immediate_operand")]))
2018      (neg:DF
2019        (match_operand:DF 3 "register_operand" "w"))
2020      (match_operand:DF 4 "register_operand" "0")))]
2021  "TARGET_SIMD"
2022  {
2023    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2024    return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2025  }
2026  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2027)
2028
2029;; Vector versions of the floating-point frint patterns.
2030;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2031(define_insn "<frint_pattern><mode>2"
2032  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2033	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2034		       FRINT))]
2035  "TARGET_SIMD"
2036  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2037  [(set_attr "type" "neon_fp_round_<stype><q>")]
2038)
2039
2040;; Vector versions of the fcvt standard patterns.
2041;; Expands to lbtrunc, lround, lceil, lfloor
2042(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2043  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2044	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2045			       [(match_operand:VHSDF 1 "register_operand" "w")]
2046			       FCVT)))]
2047  "TARGET_SIMD"
2048  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2049  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2050)
2051
2052;; HF Scalar variants of related SIMD instructions.
2053(define_insn "l<fcvt_pattern><su_optab>hfhi2"
2054  [(set (match_operand:HI 0 "register_operand" "=w")
2055	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2056		      FCVT)))]
2057  "TARGET_SIMD_F16INST"
2058  "fcvt<frint_suffix><su>\t%h0, %h1"
2059  [(set_attr "type" "neon_fp_to_int_s")]
2060)
2061
2062(define_insn "<optab>_trunchfhi2"
2063  [(set (match_operand:HI 0 "register_operand" "=w")
2064	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2065  "TARGET_SIMD_F16INST"
2066  "fcvtz<su>\t%h0, %h1"
2067  [(set_attr "type" "neon_fp_to_int_s")]
2068)
2069
2070(define_insn "<optab>hihf2"
2071  [(set (match_operand:HF 0 "register_operand" "=w")
2072	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2073  "TARGET_SIMD_F16INST"
2074  "<su_optab>cvtf\t%h0, %h1"
2075  [(set_attr "type" "neon_int_to_fp_s")]
2076)
2077
2078(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2079  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2080	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2081			       [(mult:VDQF
2082	 (match_operand:VDQF 1 "register_operand" "w")
2083	 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2084			       UNSPEC_FRINTZ)))]
2085  "TARGET_SIMD
2086   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2087		GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2088  {
2089    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2090    char buf[64];
2091    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2092    output_asm_insn (buf, operands);
2093    return "";
2094  }
2095  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2096)
2097
2098(define_expand "<optab><VHSDF:mode><fcvt_target>2"
2099  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2100	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2101			       [(match_operand:VHSDF 1 "register_operand")]
2102				UNSPEC_FRINTZ)))]
2103  "TARGET_SIMD"
2104  {})
2105
2106(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2107  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2108	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2109			       [(match_operand:VHSDF 1 "register_operand")]
2110				UNSPEC_FRINTZ)))]
2111  "TARGET_SIMD"
2112  {})
2113
2114(define_expand "ftrunc<VHSDF:mode>2"
2115  [(set (match_operand:VHSDF 0 "register_operand")
2116	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2117		       UNSPEC_FRINTZ))]
2118  "TARGET_SIMD"
2119  {})
2120
2121(define_insn "<optab><fcvt_target><VHSDF:mode>2"
2122  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2123	(FLOATUORS:VHSDF
2124	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2125  "TARGET_SIMD"
2126  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2127  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2128)
2129
2130;; Conversions between vectors of floats and doubles.
2131;; Contains a mix of patterns to match standard pattern names
2132;; and those for intrinsics.
2133
2134;; Float widening operations.
2135
2136(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2137  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2138        (float_extend:<VWIDE> (vec_select:<VHALF>
2139			       (match_operand:VQ_HSF 1 "register_operand" "w")
2140			       (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2141			    )))]
2142  "TARGET_SIMD"
2143  "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2144  [(set_attr "type" "neon_fp_cvt_widen_s")]
2145)
2146
2147;; Convert between fixed-point and floating-point (vector modes)
2148
2149(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2150  [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2151	(unspec:<VHSDF:FCVT_TARGET>
2152	  [(match_operand:VHSDF 1 "register_operand" "w")
2153	   (match_operand:SI 2 "immediate_operand" "i")]
2154	 FCVT_F2FIXED))]
2155  "TARGET_SIMD"
2156  "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2157  [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2158)
2159
2160(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2161  [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2162	(unspec:<VDQ_HSDI:FCVT_TARGET>
2163	  [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2164	   (match_operand:SI 2 "immediate_operand" "i")]
2165	 FCVT_FIXED2F))]
2166  "TARGET_SIMD"
2167  "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2168  [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2169)
2170
2171;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2172;; is inconsistent with vector ordering elsewhere in the compiler, in that
2173;; the meaning of HI and LO changes depending on the target endianness.
2174;; While elsewhere we map the higher numbered elements of a vector to
2175;; the lower architectural lanes of the vector, for these patterns we want
2176;; to always treat "hi" as referring to the higher architectural lanes.
2177;; Consequently, while the patterns below look inconsistent with our
2178;; other big-endian patterns their behavior is as required.
2179
2180(define_expand "vec_unpacks_lo_<mode>"
2181  [(match_operand:<VWIDE> 0 "register_operand" "")
2182   (match_operand:VQ_HSF 1 "register_operand" "")]
2183  "TARGET_SIMD"
2184  {
2185    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2186    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2187						       operands[1], p));
2188    DONE;
2189  }
2190)
2191
2192(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2193  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194        (float_extend:<VWIDE> (vec_select:<VHALF>
2195			       (match_operand:VQ_HSF 1 "register_operand" "w")
2196			       (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2197			    )))]
2198  "TARGET_SIMD"
2199  "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2200  [(set_attr "type" "neon_fp_cvt_widen_s")]
2201)
2202
2203(define_expand "vec_unpacks_hi_<mode>"
2204  [(match_operand:<VWIDE> 0 "register_operand" "")
2205   (match_operand:VQ_HSF 1 "register_operand" "")]
2206  "TARGET_SIMD"
2207  {
2208    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2209    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2210						       operands[1], p));
2211    DONE;
2212  }
2213)
2214(define_insn "aarch64_float_extend_lo_<Vwide>"
2215  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2216	(float_extend:<VWIDE>
2217	  (match_operand:VDF 1 "register_operand" "w")))]
2218  "TARGET_SIMD"
2219  "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2220  [(set_attr "type" "neon_fp_cvt_widen_s")]
2221)
2222
2223;; Float narrowing operations.
2224
2225(define_insn "aarch64_float_truncate_lo_<mode>"
2226  [(set (match_operand:VDF 0 "register_operand" "=w")
2227      (float_truncate:VDF
2228	(match_operand:<VWIDE> 1 "register_operand" "w")))]
2229  "TARGET_SIMD"
2230  "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2231  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2232)
2233
2234(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2235  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2236    (vec_concat:<VDBL>
2237      (match_operand:VDF 1 "register_operand" "0")
2238      (float_truncate:VDF
2239	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
2240  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2241  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2242  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2243)
2244
2245(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2246  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2247    (vec_concat:<VDBL>
2248      (float_truncate:VDF
2249	(match_operand:<VWIDE> 2 "register_operand" "w"))
2250      (match_operand:VDF 1 "register_operand" "0")))]
2251  "TARGET_SIMD && BYTES_BIG_ENDIAN"
2252  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2253  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2254)
2255
2256(define_expand "aarch64_float_truncate_hi_<Vdbl>"
2257  [(match_operand:<VDBL> 0 "register_operand" "=w")
2258   (match_operand:VDF 1 "register_operand" "0")
2259   (match_operand:<VWIDE> 2 "register_operand" "w")]
2260  "TARGET_SIMD"
2261{
2262  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2263			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2264			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2265  emit_insn (gen (operands[0], operands[1], operands[2]));
2266  DONE;
2267}
2268)
2269
2270(define_expand "vec_pack_trunc_v2df"
2271  [(set (match_operand:V4SF 0 "register_operand")
2272      (vec_concat:V4SF
2273	(float_truncate:V2SF
2274	    (match_operand:V2DF 1 "register_operand"))
2275	(float_truncate:V2SF
2276	    (match_operand:V2DF 2 "register_operand"))
2277	  ))]
2278  "TARGET_SIMD"
2279  {
2280    rtx tmp = gen_reg_rtx (V2SFmode);
2281    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2282    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2283
2284    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2285    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2286						   tmp, operands[hi]));
2287    DONE;
2288  }
2289)
2290
2291(define_expand "vec_pack_trunc_df"
2292  [(set (match_operand:V2SF 0 "register_operand")
2293      (vec_concat:V2SF
2294	(float_truncate:SF
2295	    (match_operand:DF 1 "register_operand"))
2296	(float_truncate:SF
2297	    (match_operand:DF 2 "register_operand"))
2298	  ))]
2299  "TARGET_SIMD"
2300  {
2301    rtx tmp = gen_reg_rtx (V2SFmode);
2302    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2303    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2304
2305    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2306    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2307    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2308    DONE;
2309  }
2310)
2311
2312;; FP Max/Min
2313;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2314;; expression like:
2315;;      a = (b < c) ? b : c;
2316;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2317;; -fno-signed-zeros are enabled either explicitly or indirectly via
2318;; -ffast-math.
2319;;
2320;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2321;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2322;; operand will be returned when both operands are zero (i.e. they may not
2323;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2324;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2325;; NaNs.
2326
2327(define_insn "<su><maxmin><mode>3"
2328  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2329	(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2330		       (match_operand:VHSDF 2 "register_operand" "w")))]
2331  "TARGET_SIMD"
2332  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2333  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2334)
2335
2336;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2337;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2338;; which implement the IEEE fmax ()/fmin () functions.
2339(define_insn "<maxmin_uns><mode>3"
2340  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2341       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2342		      (match_operand:VHSDF 2 "register_operand" "w")]
2343		      FMAXMIN_UNS))]
2344  "TARGET_SIMD"
2345  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2346  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2347)
2348
2349;; 'across lanes' add.
2350
2351(define_expand "reduc_plus_scal_<mode>"
2352  [(match_operand:<VEL> 0 "register_operand" "=w")
2353   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2354	       UNSPEC_ADDV)]
2355  "TARGET_SIMD"
2356  {
2357    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358    rtx scratch = gen_reg_rtx (<MODE>mode);
2359    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2360    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2361    DONE;
2362  }
2363)
2364
2365(define_insn "aarch64_faddp<mode>"
2366 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2367       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2368		      (match_operand:VHSDF 2 "register_operand" "w")]
2369	UNSPEC_FADDV))]
2370 "TARGET_SIMD"
2371 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2372  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2373)
2374
2375(define_insn "aarch64_reduc_plus_internal<mode>"
2376 [(set (match_operand:VDQV 0 "register_operand" "=w")
2377       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2378		    UNSPEC_ADDV))]
2379 "TARGET_SIMD"
2380 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2381  [(set_attr "type" "neon_reduc_add<q>")]
2382)
2383
2384(define_insn "aarch64_reduc_plus_internalv2si"
2385 [(set (match_operand:V2SI 0 "register_operand" "=w")
2386       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2387		    UNSPEC_ADDV))]
2388 "TARGET_SIMD"
2389 "addp\\t%0.2s, %1.2s, %1.2s"
2390  [(set_attr "type" "neon_reduc_add")]
2391)
2392
2393(define_insn "reduc_plus_scal_<mode>"
2394 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2395       (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2396		   UNSPEC_FADDV))]
2397 "TARGET_SIMD"
2398 "faddp\\t%<Vetype>0, %1.<Vtype>"
2399  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2400)
2401
2402(define_expand "reduc_plus_scal_v4sf"
2403 [(set (match_operand:SF 0 "register_operand")
2404       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2405		    UNSPEC_FADDV))]
2406 "TARGET_SIMD"
2407{
2408  rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2409  rtx scratch = gen_reg_rtx (V4SFmode);
2410  emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2411  emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2412  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2413  DONE;
2414})
2415
2416(define_insn "clrsb<mode>2"
2417  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2418        (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2419  "TARGET_SIMD"
2420  "cls\\t%0.<Vtype>, %1.<Vtype>"
2421  [(set_attr "type" "neon_cls<q>")]
2422)
2423
2424(define_insn "clz<mode>2"
2425 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2426       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2427 "TARGET_SIMD"
2428 "clz\\t%0.<Vtype>, %1.<Vtype>"
2429  [(set_attr "type" "neon_cls<q>")]
2430)
2431
2432(define_insn "popcount<mode>2"
2433  [(set (match_operand:VB 0 "register_operand" "=w")
2434        (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2435  "TARGET_SIMD"
2436  "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2437  [(set_attr "type" "neon_cnt<q>")]
2438)
2439
2440;; 'across lanes' max and min ops.
2441
2442;; Template for outputting a scalar, so we can create __builtins which can be
2443;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2444(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2445  [(match_operand:<VEL> 0 "register_operand")
2446   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2447		  FMAXMINV)]
2448  "TARGET_SIMD"
2449  {
2450    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2451    rtx scratch = gen_reg_rtx (<MODE>mode);
2452    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2453							      operands[1]));
2454    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2455    DONE;
2456  }
2457)
2458
2459;; Likewise for integer cases, signed and unsigned.
2460(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2461  [(match_operand:<VEL> 0 "register_operand")
2462   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2463		    MAXMINV)]
2464  "TARGET_SIMD"
2465  {
2466    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2467    rtx scratch = gen_reg_rtx (<MODE>mode);
2468    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2469							      operands[1]));
2470    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2471    DONE;
2472  }
2473)
2474
2475(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2476 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2477       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2478		    MAXMINV))]
2479 "TARGET_SIMD"
2480 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2481  [(set_attr "type" "neon_reduc_minmax<q>")]
2482)
2483
2484(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2485 [(set (match_operand:V2SI 0 "register_operand" "=w")
2486       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2487		    MAXMINV))]
2488 "TARGET_SIMD"
2489 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2490  [(set_attr "type" "neon_reduc_minmax")]
2491)
2492
2493(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2494 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2495       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2496		      FMAXMINV))]
2497 "TARGET_SIMD"
2498 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2499  [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2500)
2501
2502;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2503;; allocation.
2504;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2505;; to select.
2506;;
2507;; Thus our BSL is of the form:
2508;;   op0 = bsl (mask, op2, op3)
2509;; We can use any of:
2510;;
2511;;   if (op0 = mask)
2512;;     bsl mask, op1, op2
2513;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2514;;     bit op0, op2, mask
2515;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2516;;     bif op0, op1, mask
2517;;
2518;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2519;; Some forms of straight-line code may generate the equivalent form
2520;; in *aarch64_simd_bsl<mode>_alt.
2521
2522(define_insn "aarch64_simd_bsl<mode>_internal"
2523  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2524	(xor:VDQ_I
2525	   (and:VDQ_I
2526	     (xor:VDQ_I
2527	       (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2528	       (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2529	     (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2530	  (match_dup:<V_INT_EQUIV> 3)
2531	))]
2532  "TARGET_SIMD"
2533  "@
2534  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2535  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2536  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2537  [(set_attr "type" "neon_bsl<q>")]
2538)
2539
2540;; We need this form in addition to the above pattern to match the case
2541;; when combine tries merging three insns such that the second operand of
2542;; the outer XOR matches the second operand of the inner XOR rather than
2543;; the first.  The two are equivalent but since recog doesn't try all
2544;; permutations of commutative operations, we have to have a separate pattern.
2545
2546(define_insn "*aarch64_simd_bsl<mode>_alt"
2547  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2548	(xor:VDQ_I
2549	   (and:VDQ_I
2550	     (xor:VDQ_I
2551	       (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2552	       (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2553	      (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2554	  (match_dup:<V_INT_EQUIV> 2)))]
2555  "TARGET_SIMD"
2556  "@
2557  bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2558  bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2559  bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2560  [(set_attr "type" "neon_bsl<q>")]
2561)
2562
2563;; DImode is special, we want to avoid computing operations which are
2564;; more naturally computed in general purpose registers in the vector
2565;; registers.  If we do that, we need to move all three operands from general
2566;; purpose registers to vector registers, then back again.  However, we
2567;; don't want to make this pattern an UNSPEC as we'd lose scope for
2568;; optimizations based on the component operations of a BSL.
2569;;
2570;; That means we need a splitter back to the individual operations, if they
2571;; would be better calculated on the integer side.
2572
2573(define_insn_and_split "aarch64_simd_bsldi_internal"
2574  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2575	(xor:DI
2576	   (and:DI
2577	     (xor:DI
2578	       (match_operand:DI 3 "register_operand" "w,0,w,r")
2579	       (match_operand:DI 2 "register_operand" "w,w,0,r"))
2580	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2581	  (match_dup:DI 3)
2582	))]
2583  "TARGET_SIMD"
2584  "@
2585  bsl\\t%0.8b, %2.8b, %3.8b
2586  bit\\t%0.8b, %2.8b, %1.8b
2587  bif\\t%0.8b, %3.8b, %1.8b
2588  #"
2589  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2590  [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2591{
2592  /* Split back to individual operations.  If we're before reload, and
2593     able to create a temporary register, do so.  If we're after reload,
2594     we've got an early-clobber destination register, so use that.
2595     Otherwise, we can't create pseudos and we can't yet guarantee that
2596     operands[0] is safe to write, so FAIL to split.  */
2597
2598  rtx scratch;
2599  if (reload_completed)
2600    scratch = operands[0];
2601  else if (can_create_pseudo_p ())
2602    scratch = gen_reg_rtx (DImode);
2603  else
2604    FAIL;
2605
2606  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2607  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2608  emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2609  DONE;
2610}
2611  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2612   (set_attr "length" "4,4,4,12")]
2613)
2614
2615(define_insn_and_split "aarch64_simd_bsldi_alt"
2616  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2617	(xor:DI
2618	   (and:DI
2619	     (xor:DI
2620	       (match_operand:DI 3 "register_operand" "w,w,0,r")
2621	       (match_operand:DI 2 "register_operand" "w,0,w,r"))
2622	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2623	  (match_dup:DI 2)
2624	))]
2625  "TARGET_SIMD"
2626  "@
2627  bsl\\t%0.8b, %3.8b, %2.8b
2628  bit\\t%0.8b, %3.8b, %1.8b
2629  bif\\t%0.8b, %2.8b, %1.8b
2630  #"
2631  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2632  [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2633{
2634  /* Split back to individual operations.  If we're before reload, and
2635     able to create a temporary register, do so.  If we're after reload,
2636     we've got an early-clobber destination register, so use that.
2637     Otherwise, we can't create pseudos and we can't yet guarantee that
2638     operands[0] is safe to write, so FAIL to split.  */
2639
2640  rtx scratch;
2641  if (reload_completed)
2642    scratch = operands[0];
2643  else if (can_create_pseudo_p ())
2644    scratch = gen_reg_rtx (DImode);
2645  else
2646    FAIL;
2647
2648  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2649  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2650  emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2651  DONE;
2652}
2653  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2654   (set_attr "length" "4,4,4,12")]
2655)
2656
2657(define_expand "aarch64_simd_bsl<mode>"
2658  [(match_operand:VALLDIF 0 "register_operand")
2659   (match_operand:<V_INT_EQUIV> 1 "register_operand")
2660   (match_operand:VALLDIF 2 "register_operand")
2661   (match_operand:VALLDIF 3 "register_operand")]
2662 "TARGET_SIMD"
2663{
2664  /* We can't alias operands together if they have different modes.  */
2665  rtx tmp = operands[0];
2666  if (FLOAT_MODE_P (<MODE>mode))
2667    {
2668      operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2669      operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2670      tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2671    }
2672  operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2673  emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2674							 operands[1],
2675							 operands[2],
2676							 operands[3]));
2677  if (tmp != operands[0])
2678    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2679
2680  DONE;
2681})
2682
2683(define_expand "vcond_mask_<mode><v_int_equiv>"
2684  [(match_operand:VALLDI 0 "register_operand")
2685   (match_operand:VALLDI 1 "nonmemory_operand")
2686   (match_operand:VALLDI 2 "nonmemory_operand")
2687   (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2688  "TARGET_SIMD"
2689{
2690  /* If we have (a = (P) ? -1 : 0);
2691     Then we can simply move the generated mask (result must be int).  */
2692  if (operands[1] == CONSTM1_RTX (<MODE>mode)
2693      && operands[2] == CONST0_RTX (<MODE>mode))
2694    emit_move_insn (operands[0], operands[3]);
2695  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2696  else if (operands[1] == CONST0_RTX (<MODE>mode)
2697	   && operands[2] == CONSTM1_RTX (<MODE>mode))
2698    emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2699  else
2700    {
2701      if (!REG_P (operands[1]))
2702	operands[1] = force_reg (<MODE>mode, operands[1]);
2703      if (!REG_P (operands[2]))
2704	operands[2] = force_reg (<MODE>mode, operands[2]);
2705      emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2706					     operands[1], operands[2]));
2707    }
2708
2709  DONE;
2710})
2711
2712;; Patterns comparing two vectors to produce a mask.
2713
2714(define_expand "vec_cmp<mode><mode>"
2715  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2716	  (match_operator 1 "comparison_operator"
2717	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2718	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2719  "TARGET_SIMD"
2720{
2721  rtx mask = operands[0];
2722  enum rtx_code code = GET_CODE (operands[1]);
2723
2724  switch (code)
2725    {
2726    case NE:
2727    case LE:
2728    case LT:
2729    case GE:
2730    case GT:
2731    case EQ:
2732      if (operands[3] == CONST0_RTX (<MODE>mode))
2733	break;
2734
2735      /* Fall through.  */
2736    default:
2737      if (!REG_P (operands[3]))
2738	operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740      break;
2741    }
2742
2743  switch (code)
2744    {
2745    case LT:
2746      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2747      break;
2748
2749    case GE:
2750      emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2751      break;
2752
2753    case LE:
2754      emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2755      break;
2756
2757    case GT:
2758      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2759      break;
2760
2761    case LTU:
2762      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2763      break;
2764
2765    case GEU:
2766      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2767      break;
2768
2769    case LEU:
2770      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2771      break;
2772
2773    case GTU:
2774      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2775      break;
2776
2777    case NE:
2778      /* Handle NE as !EQ.  */
2779      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2780      emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2781      break;
2782
2783    case EQ:
2784      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2785      break;
2786
2787    default:
2788      gcc_unreachable ();
2789    }
2790
2791  DONE;
2792})
2793
2794(define_expand "vec_cmp<mode><v_int_equiv>"
2795  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2796	(match_operator 1 "comparison_operator"
2797	    [(match_operand:VDQF 2 "register_operand")
2798	     (match_operand:VDQF 3 "nonmemory_operand")]))]
2799  "TARGET_SIMD"
2800{
2801  int use_zero_form = 0;
2802  enum rtx_code code = GET_CODE (operands[1]);
2803  rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2804
2805  rtx (*comparison) (rtx, rtx, rtx) = NULL;
2806
2807  switch (code)
2808    {
2809    case LE:
2810    case LT:
2811    case GE:
2812    case GT:
2813    case EQ:
2814      if (operands[3] == CONST0_RTX (<MODE>mode))
2815	{
2816	  use_zero_form = 1;
2817	  break;
2818	}
2819      /* Fall through.  */
2820    default:
2821      if (!REG_P (operands[3]))
2822	operands[3] = force_reg (<MODE>mode, operands[3]);
2823
2824      break;
2825    }
2826
2827  switch (code)
2828    {
2829    case LT:
2830      if (use_zero_form)
2831	{
2832	  comparison = gen_aarch64_cmlt<mode>;
2833	  break;
2834	}
2835      /* Fall through.  */
2836    case UNLT:
2837      std::swap (operands[2], operands[3]);
2838      /* Fall through.  */
2839    case UNGT:
2840    case GT:
2841      comparison = gen_aarch64_cmgt<mode>;
2842      break;
2843    case LE:
2844      if (use_zero_form)
2845	{
2846	  comparison = gen_aarch64_cmle<mode>;
2847	  break;
2848	}
2849      /* Fall through.  */
2850    case UNLE:
2851      std::swap (operands[2], operands[3]);
2852      /* Fall through.  */
2853    case UNGE:
2854    case GE:
2855      comparison = gen_aarch64_cmge<mode>;
2856      break;
2857    case NE:
2858    case EQ:
2859      comparison = gen_aarch64_cmeq<mode>;
2860      break;
2861    case UNEQ:
2862    case ORDERED:
2863    case UNORDERED:
2864    case LTGT:
2865      break;
2866    default:
2867      gcc_unreachable ();
2868    }
2869
2870  switch (code)
2871    {
2872    case UNGE:
2873    case UNGT:
2874    case UNLE:
2875    case UNLT:
2876      {
2877	/* All of the above must not raise any FP exceptions.  Thus we first
2878	   check each operand for NaNs and force any elements containing NaN to
2879	   zero before using them in the compare.
2880	   Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2881				     (cm<cc> (isnan (a) ? 0.0 : a,
2882					      isnan (b) ? 0.0 : b))
2883	   We use the following transformations for doing the comparisions:
2884	   a UNGE b -> a GE b
2885	   a UNGT b -> a GT b
2886	   a UNLE b -> b GE a
2887	   a UNLT b -> b GT a.  */
2888
2889	rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2890	rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2891	rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2892	emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2893	emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2894	emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2895	emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2896					  lowpart_subreg (<V_INT_EQUIV>mode,
2897							  operands[2],
2898							  <MODE>mode)));
2899	emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2900					  lowpart_subreg (<V_INT_EQUIV>mode,
2901							  operands[3],
2902							  <MODE>mode)));
2903	gcc_assert (comparison != NULL);
2904	emit_insn (comparison (operands[0],
2905			       lowpart_subreg (<MODE>mode,
2906					       tmp0, <V_INT_EQUIV>mode),
2907			       lowpart_subreg (<MODE>mode,
2908					       tmp1, <V_INT_EQUIV>mode)));
2909	emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2910      }
2911      break;
2912
2913    case LT:
2914    case LE:
2915    case GT:
2916    case GE:
2917    case EQ:
2918    case NE:
2919      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2920	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2921	 a GE b -> a GE b
2922	 a GT b -> a GT b
2923	 a LE b -> b GE a
2924	 a LT b -> b GT a
2925	 a EQ b -> a EQ b
2926	 a NE b -> ~(a EQ b)  */
2927      gcc_assert (comparison != NULL);
2928      emit_insn (comparison (operands[0], operands[2], operands[3]));
2929      if (code == NE)
2930	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2931      break;
2932
2933    case LTGT:
2934      /* LTGT is not guranteed to not generate a FP exception.  So let's
2935	 go the faster way : ((a > b) || (b > a)).  */
2936      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2937					 operands[2], operands[3]));
2938      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2939      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2940      break;
2941
2942    case ORDERED:
2943    case UNORDERED:
2944    case UNEQ:
2945      /* cmeq (a, a) & cmeq (b, b).  */
2946      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2947					 operands[2], operands[2]));
2948      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2949      emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2950
2951      if (code == UNORDERED)
2952	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2953      else if (code == UNEQ)
2954	{
2955	  emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2956	  emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2957	}
2958      break;
2959
2960    default:
2961      gcc_unreachable ();
2962    }
2963
2964  DONE;
2965})
2966
2967(define_expand "vec_cmpu<mode><mode>"
2968  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2969	  (match_operator 1 "comparison_operator"
2970	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2971	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2972  "TARGET_SIMD"
2973{
2974  emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2975				      operands[2], operands[3]));
2976  DONE;
2977})
2978
2979(define_expand "vcond<mode><mode>"
2980  [(set (match_operand:VALLDI 0 "register_operand")
2981	(if_then_else:VALLDI
2982	  (match_operator 3 "comparison_operator"
2983	    [(match_operand:VALLDI 4 "register_operand")
2984	     (match_operand:VALLDI 5 "nonmemory_operand")])
2985	  (match_operand:VALLDI 1 "nonmemory_operand")
2986	  (match_operand:VALLDI 2 "nonmemory_operand")))]
2987  "TARGET_SIMD"
2988{
2989  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2990  enum rtx_code code = GET_CODE (operands[3]);
2991
2992  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2993     it as well as switch operands 1/2 in order to avoid the additional
2994     NOT instruction.  */
2995  if (code == NE)
2996    {
2997      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2998				    operands[4], operands[5]);
2999      std::swap (operands[1], operands[2]);
3000    }
3001  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3002					     operands[4], operands[5]));
3003  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3004						 operands[2], mask));
3005
3006  DONE;
3007})
3008
3009(define_expand "vcond<v_cmp_mixed><mode>"
3010  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3011	(if_then_else:<V_cmp_mixed>
3012	  (match_operator 3 "comparison_operator"
3013	    [(match_operand:VDQF_COND 4 "register_operand")
3014	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
3015	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3016	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3017  "TARGET_SIMD"
3018{
3019  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3020  enum rtx_code code = GET_CODE (operands[3]);
3021
3022  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3023     it as well as switch operands 1/2 in order to avoid the additional
3024     NOT instruction.  */
3025  if (code == NE)
3026    {
3027      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3028				    operands[4], operands[5]);
3029      std::swap (operands[1], operands[2]);
3030    }
3031  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3032					     operands[4], operands[5]));
3033  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3034						operands[0], operands[1],
3035						operands[2], mask));
3036
3037  DONE;
3038})
3039
3040(define_expand "vcondu<mode><mode>"
3041  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3042	(if_then_else:VSDQ_I_DI
3043	  (match_operator 3 "comparison_operator"
3044	    [(match_operand:VSDQ_I_DI 4 "register_operand")
3045	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3046	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3047	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3048  "TARGET_SIMD"
3049{
3050  rtx mask = gen_reg_rtx (<MODE>mode);
3051  enum rtx_code code = GET_CODE (operands[3]);
3052
3053  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3054     it as well as switch operands 1/2 in order to avoid the additional
3055     NOT instruction.  */
3056  if (code == NE)
3057    {
3058      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3059				    operands[4], operands[5]);
3060      std::swap (operands[1], operands[2]);
3061    }
3062  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3063				      operands[4], operands[5]));
3064  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3065						 operands[2], mask));
3066  DONE;
3067})
3068
3069(define_expand "vcondu<mode><v_cmp_mixed>"
3070  [(set (match_operand:VDQF 0 "register_operand")
3071	(if_then_else:VDQF
3072	  (match_operator 3 "comparison_operator"
3073	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
3074	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3075	  (match_operand:VDQF 1 "nonmemory_operand")
3076	  (match_operand:VDQF 2 "nonmemory_operand")))]
3077  "TARGET_SIMD"
3078{
3079  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3080  enum rtx_code code = GET_CODE (operands[3]);
3081
3082  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3083     it as well as switch operands 1/2 in order to avoid the additional
3084     NOT instruction.  */
3085  if (code == NE)
3086    {
3087      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3088				    operands[4], operands[5]);
3089      std::swap (operands[1], operands[2]);
3090    }
3091  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3092						  mask, operands[3],
3093						  operands[4], operands[5]));
3094  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3095						 operands[2], mask));
3096  DONE;
3097})
3098
3099;; Patterns for AArch64 SIMD Intrinsics.
3100
3101;; Lane extraction with sign extension to general purpose register.
3102(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3103  [(set (match_operand:GPI 0 "register_operand" "=r")
3104	(sign_extend:GPI
3105	  (vec_select:<VEL>
3106	    (match_operand:VDQQH 1 "register_operand" "w")
3107	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3108  "TARGET_SIMD"
3109  {
3110    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3111    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3112  }
3113  [(set_attr "type" "neon_to_gp<q>")]
3114)
3115
3116(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3117  [(set (match_operand:GPI 0 "register_operand" "=r")
3118	(zero_extend:GPI
3119	  (vec_select:<VEL>
3120	    (match_operand:VDQQH 1 "register_operand" "w")
3121	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3122  "TARGET_SIMD"
3123  {
3124    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3125					   INTVAL (operands[2]));
3126    return "umov\\t%w0, %1.<Vetype>[%2]";
3127  }
3128  [(set_attr "type" "neon_to_gp<q>")]
3129)
3130
3131;; Lane extraction of a value, neither sign nor zero extension
3132;; is guaranteed so upper bits should be considered undefined.
3133;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3134(define_insn "aarch64_get_lane<mode>"
3135  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3136	(vec_select:<VEL>
3137	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3138	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3139  "TARGET_SIMD"
3140  {
3141    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3142    switch (which_alternative)
3143      {
3144	case 0:
3145	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3146	case 1:
3147	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3148	case 2:
3149	  return "st1\\t{%1.<Vetype>}[%2], %0";
3150	default:
3151	  gcc_unreachable ();
3152      }
3153  }
3154  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3155)
3156
3157(define_insn "load_pair_lanes<mode>"
3158  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3159	(vec_concat:<VDBL>
3160	   (match_operand:VDC 1 "memory_operand" "Utq")
3161	   (match_operand:VDC 2 "memory_operand" "m")))]
3162  "TARGET_SIMD && !STRICT_ALIGNMENT
3163   && rtx_equal_p (XEXP (operands[2], 0),
3164		   plus_constant (Pmode,
3165				  XEXP (operands[1], 0),
3166				  GET_MODE_SIZE (<MODE>mode)))"
3167  "ldr\\t%q0, %1"
3168  [(set_attr "type" "neon_load1_1reg_q")]
3169)
3170
3171(define_insn "store_pair_lanes<mode>"
3172  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3173	(vec_concat:<VDBL>
3174	   (match_operand:VDC 1 "register_operand" "w, r")
3175	   (match_operand:VDC 2 "register_operand" "w, r")))]
3176  "TARGET_SIMD"
3177  "@
3178   stp\\t%d1, %d2, %y0
3179   stp\\t%x1, %x2, %y0"
3180  [(set_attr "type" "neon_stp, store_16")]
3181)
3182
3183;; In this insn, operand 1 should be low, and operand 2 the high part of the
3184;; dest vector.
3185
3186(define_insn "*aarch64_combinez<mode>"
3187  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3188	(vec_concat:<VDBL>
3189	  (match_operand:VDC 1 "general_operand" "w,?r,m")
3190	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3191  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3192  "@
3193   mov\\t%0.8b, %1.8b
3194   fmov\t%d0, %1
3195   ldr\\t%d0, %1"
3196  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3197   (set_attr "arch" "simd,fp,simd")]
3198)
3199
3200(define_insn "*aarch64_combinez_be<mode>"
3201  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3202        (vec_concat:<VDBL>
3203	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3204	  (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3205  "TARGET_SIMD && BYTES_BIG_ENDIAN"
3206  "@
3207   mov\\t%0.8b, %1.8b
3208   fmov\t%d0, %1
3209   ldr\\t%d0, %1"
3210  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3211   (set_attr "arch" "simd,fp,simd")]
3212)
3213
3214(define_expand "aarch64_combine<mode>"
3215  [(match_operand:<VDBL> 0 "register_operand")
3216   (match_operand:VDC 1 "register_operand")
3217   (match_operand:VDC 2 "register_operand")]
3218  "TARGET_SIMD"
3219{
3220  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3221
3222  DONE;
3223}
3224)
3225
3226(define_expand "@aarch64_simd_combine<mode>"
3227  [(match_operand:<VDBL> 0 "register_operand")
3228   (match_operand:VDC 1 "register_operand")
3229   (match_operand:VDC 2 "register_operand")]
3230  "TARGET_SIMD"
3231  {
3232    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3233    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3234    DONE;
3235  }
3236[(set_attr "type" "multiple")]
3237)
3238
3239;; <su><addsub>l<q>.
3240
3241(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3243       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3244			   (match_operand:VQW 1 "register_operand" "w")
3245			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3246		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3247			   (match_operand:VQW 2 "register_operand" "w")
3248			   (match_dup 3)))))]
3249  "TARGET_SIMD"
3250  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3251  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3252)
3253
3254(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3256       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3257                           (match_operand:VQW 1 "register_operand" "w")
3258                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3259                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3260                           (match_operand:VQW 2 "register_operand" "w")
3261                           (match_dup 3)))))]
3262  "TARGET_SIMD"
3263  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3264  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3265)
3266
3267
3268(define_expand "aarch64_saddl2<mode>"
3269  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3270   (match_operand:VQW 1 "register_operand" "w")
3271   (match_operand:VQW 2 "register_operand" "w")]
3272  "TARGET_SIMD"
3273{
3274  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3275  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3276                                                  operands[2], p));
3277  DONE;
3278})
3279
3280(define_expand "aarch64_uaddl2<mode>"
3281  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3282   (match_operand:VQW 1 "register_operand" "w")
3283   (match_operand:VQW 2 "register_operand" "w")]
3284  "TARGET_SIMD"
3285{
3286  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3287  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3288                                                  operands[2], p));
3289  DONE;
3290})
3291
3292(define_expand "aarch64_ssubl2<mode>"
3293  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3294   (match_operand:VQW 1 "register_operand" "w")
3295   (match_operand:VQW 2 "register_operand" "w")]
3296  "TARGET_SIMD"
3297{
3298  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3299  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3300						operands[2], p));
3301  DONE;
3302})
3303
3304(define_expand "aarch64_usubl2<mode>"
3305  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3306   (match_operand:VQW 1 "register_operand" "w")
3307   (match_operand:VQW 2 "register_operand" "w")]
3308  "TARGET_SIMD"
3309{
3310  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3311  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3312						operands[2], p));
3313  DONE;
3314})
3315
3316(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3319			   (match_operand:VD_BHSI 1 "register_operand" "w"))
3320		       (ANY_EXTEND:<VWIDE>
3321			   (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3322  "TARGET_SIMD"
3323  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3324  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3325)
3326
3327;; <su><addsub>w<q>.
3328
3329(define_expand "widen_ssum<mode>3"
3330  [(set (match_operand:<VDBLW> 0 "register_operand" "")
3331	(plus:<VDBLW> (sign_extend:<VDBLW>
3332		        (match_operand:VQW 1 "register_operand" ""))
3333		      (match_operand:<VDBLW> 2 "register_operand" "")))]
3334  "TARGET_SIMD"
3335  {
3336    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3337    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3338
3339    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3340						operands[1], p));
3341    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3342    DONE;
3343  }
3344)
3345
3346(define_expand "widen_ssum<mode>3"
3347  [(set (match_operand:<VWIDE> 0 "register_operand" "")
3348	(plus:<VWIDE> (sign_extend:<VWIDE>
3349		        (match_operand:VD_BHSI 1 "register_operand" ""))
3350		      (match_operand:<VWIDE> 2 "register_operand" "")))]
3351  "TARGET_SIMD"
3352{
3353  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3354  DONE;
3355})
3356
3357(define_expand "widen_usum<mode>3"
3358  [(set (match_operand:<VDBLW> 0 "register_operand" "")
3359	(plus:<VDBLW> (zero_extend:<VDBLW>
3360		        (match_operand:VQW 1 "register_operand" ""))
3361		      (match_operand:<VDBLW> 2 "register_operand" "")))]
3362  "TARGET_SIMD"
3363  {
3364    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3365    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3366
3367    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3368						 operands[1], p));
3369    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3370    DONE;
3371  }
3372)
3373
3374(define_expand "widen_usum<mode>3"
3375  [(set (match_operand:<VWIDE> 0 "register_operand" "")
3376	(plus:<VWIDE> (zero_extend:<VWIDE>
3377		        (match_operand:VD_BHSI 1 "register_operand" ""))
3378		      (match_operand:<VWIDE> 2 "register_operand" "")))]
3379  "TARGET_SIMD"
3380{
3381  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3382  DONE;
3383})
3384
3385(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3386  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3387	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3388	  (ANY_EXTEND:<VWIDE>
3389	    (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3390  "TARGET_SIMD"
3391  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3392  [(set_attr "type" "neon_sub_widen")]
3393)
3394
3395(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3396  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3397	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3398	  (ANY_EXTEND:<VWIDE>
3399	    (vec_select:<VHALF>
3400	      (match_operand:VQW 2 "register_operand" "w")
3401	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3402  "TARGET_SIMD"
3403  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3404  [(set_attr "type" "neon_sub_widen")]
3405)
3406
3407(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3408  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3409	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3410	  (ANY_EXTEND:<VWIDE>
3411	    (vec_select:<VHALF>
3412	      (match_operand:VQW 2 "register_operand" "w")
3413	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3414  "TARGET_SIMD"
3415  "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3416  [(set_attr "type" "neon_sub_widen")]
3417)
3418
3419(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3420  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421	(plus:<VWIDE>
3422	  (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3423	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3424  "TARGET_SIMD"
3425  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426  [(set_attr "type" "neon_add_widen")]
3427)
3428
3429(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3430  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431	(plus:<VWIDE>
3432	  (ANY_EXTEND:<VWIDE>
3433	    (vec_select:<VHALF>
3434	      (match_operand:VQW 2 "register_operand" "w")
3435	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3436	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3437  "TARGET_SIMD"
3438  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3439  [(set_attr "type" "neon_add_widen")]
3440)
3441
3442(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3443  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3444	(plus:<VWIDE>
3445	  (ANY_EXTEND:<VWIDE>
3446	    (vec_select:<VHALF>
3447	      (match_operand:VQW 2 "register_operand" "w")
3448	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3449	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3450  "TARGET_SIMD"
3451  "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3452  [(set_attr "type" "neon_add_widen")]
3453)
3454
3455(define_expand "aarch64_saddw2<mode>"
3456  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3457   (match_operand:<VWIDE> 1 "register_operand" "w")
3458   (match_operand:VQW 2 "register_operand" "w")]
3459  "TARGET_SIMD"
3460{
3461  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3462  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3463						operands[2], p));
3464  DONE;
3465})
3466
3467(define_expand "aarch64_uaddw2<mode>"
3468  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3469   (match_operand:<VWIDE> 1 "register_operand" "w")
3470   (match_operand:VQW 2 "register_operand" "w")]
3471  "TARGET_SIMD"
3472{
3473  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3474  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3475						operands[2], p));
3476  DONE;
3477})
3478
3479
3480(define_expand "aarch64_ssubw2<mode>"
3481  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3482   (match_operand:<VWIDE> 1 "register_operand" "w")
3483   (match_operand:VQW 2 "register_operand" "w")]
3484  "TARGET_SIMD"
3485{
3486  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3487  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3488						operands[2], p));
3489  DONE;
3490})
3491
3492(define_expand "aarch64_usubw2<mode>"
3493  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3494   (match_operand:<VWIDE> 1 "register_operand" "w")
3495   (match_operand:VQW 2 "register_operand" "w")]
3496  "TARGET_SIMD"
3497{
3498  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3499  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3500						operands[2], p));
3501  DONE;
3502})
3503
3504;; <su><r>h<addsub>.
3505
3506(define_expand "<u>avg<mode>3_floor"
3507  [(set (match_operand:VDQ_BHSI 0 "register_operand")
3508	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3509			  (match_operand:VDQ_BHSI 2 "register_operand")]
3510			 HADD))]
3511  "TARGET_SIMD"
3512)
3513
3514(define_expand "<u>avg<mode>3_ceil"
3515  [(set (match_operand:VDQ_BHSI 0 "register_operand")
3516	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3517			  (match_operand:VDQ_BHSI 2 "register_operand")]
3518			 RHADD))]
3519  "TARGET_SIMD"
3520)
3521
3522(define_insn "aarch64_<sur>h<addsub><mode>"
3523  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3524        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3525		      (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3526		     HADDSUB))]
3527  "TARGET_SIMD"
3528  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3529  [(set_attr "type" "neon_<addsub>_halve<q>")]
3530)
3531
3532;; <r><addsub>hn<q>.
3533
3534(define_insn "aarch64_<sur><addsub>hn<mode>"
3535  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3536        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3537			    (match_operand:VQN 2 "register_operand" "w")]
3538                           ADDSUBHN))]
3539  "TARGET_SIMD"
3540  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3541  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3542)
3543
3544(define_insn "aarch64_<sur><addsub>hn2<mode>"
3545  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3546        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3547			     (match_operand:VQN 2 "register_operand" "w")
3548			     (match_operand:VQN 3 "register_operand" "w")]
3549                            ADDSUBHN2))]
3550  "TARGET_SIMD"
3551  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3552  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3553)
3554
3555;; pmul.
3556
3557(define_insn "aarch64_pmul<mode>"
3558  [(set (match_operand:VB 0 "register_operand" "=w")
3559        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3560		    (match_operand:VB 2 "register_operand" "w")]
3561		   UNSPEC_PMUL))]
3562 "TARGET_SIMD"
3563 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3564  [(set_attr "type" "neon_mul_<Vetype><q>")]
3565)
3566
3567;; fmulx.
3568
3569(define_insn "aarch64_fmulx<mode>"
3570  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3571	(unspec:VHSDF_HSDF
3572	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3573	   (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3574	   UNSPEC_FMULX))]
3575 "TARGET_SIMD"
3576 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3577 [(set_attr "type" "neon_fp_mul_<stype>")]
3578)
3579
3580;; vmulxq_lane_f32, and vmulx_laneq_f32
3581
3582(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3583  [(set (match_operand:VDQSF 0 "register_operand" "=w")
3584	(unspec:VDQSF
3585	 [(match_operand:VDQSF 1 "register_operand" "w")
3586	  (vec_duplicate:VDQSF
3587	   (vec_select:<VEL>
3588	    (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3589	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3590	 UNSPEC_FMULX))]
3591  "TARGET_SIMD"
3592  {
3593    operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3594    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3595  }
3596  [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3597)
3598
3599;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3600
3601(define_insn "*aarch64_mulx_elt<mode>"
3602  [(set (match_operand:VDQF 0 "register_operand" "=w")
3603	(unspec:VDQF
3604	 [(match_operand:VDQF 1 "register_operand" "w")
3605	  (vec_duplicate:VDQF
3606	   (vec_select:<VEL>
3607	    (match_operand:VDQF 2 "register_operand" "w")
3608	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3609	 UNSPEC_FMULX))]
3610  "TARGET_SIMD"
3611  {
3612    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3613    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3614  }
3615  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3616)
3617
3618;; vmulxq_lane
3619
3620(define_insn "*aarch64_mulx_elt_from_dup<mode>"
3621  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3622	(unspec:VHSDF
3623	 [(match_operand:VHSDF 1 "register_operand" "w")
3624	  (vec_duplicate:VHSDF
3625	    (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3626	 UNSPEC_FMULX))]
3627  "TARGET_SIMD"
3628  "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3629  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3630)
3631
3632;; vmulxs_lane_f32, vmulxs_laneq_f32
3633;; vmulxd_lane_f64 ==  vmulx_lane_f64
3634;; vmulxd_laneq_f64 == vmulx_laneq_f64
3635
3636(define_insn "*aarch64_vgetfmulx<mode>"
3637  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3638	(unspec:<VEL>
3639	 [(match_operand:<VEL> 1 "register_operand" "w")
3640	  (vec_select:<VEL>
3641	   (match_operand:VDQF 2 "register_operand" "w")
3642	    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3643	 UNSPEC_FMULX))]
3644  "TARGET_SIMD"
3645  {
3646    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647    return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3648  }
3649  [(set_attr "type" "fmul<Vetype>")]
3650)
3651;; <su>q<addsub>
3652
3653(define_insn "aarch64_<su_optab><optab><mode>"
3654  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3655	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3656			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
3657  "TARGET_SIMD"
3658  "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3659  [(set_attr "type" "neon_<optab><q>")]
3660)
3661
3662;; suqadd and usqadd
3663
3664(define_insn "aarch64_<sur>qadd<mode>"
3665  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3666	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3667			(match_operand:VSDQ_I 2 "register_operand" "w")]
3668		       USSUQADD))]
3669  "TARGET_SIMD"
3670  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3671  [(set_attr "type" "neon_qadd<q>")]
3672)
3673
3674;; sqmovun
3675
3676(define_insn "aarch64_sqmovun<mode>"
3677  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3678	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3679                            UNSPEC_SQXTUN))]
3680   "TARGET_SIMD"
3681   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3682   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3683)
3684
3685;; sqmovn and uqmovn
3686
3687(define_insn "aarch64_<sur>qmovn<mode>"
3688  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3689	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3690                            SUQMOVN))]
3691  "TARGET_SIMD"
3692  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3693   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3694)
3695
3696;; <su>q<absneg>
3697
3698(define_insn "aarch64_s<optab><mode>"
3699  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700	(UNQOPS:VSDQ_I
3701	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
3702  "TARGET_SIMD"
3703  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3704  [(set_attr "type" "neon_<optab><q>")]
3705)
3706
3707;; sq<r>dmulh.
3708
3709(define_insn "aarch64_sq<r>dmulh<mode>"
3710  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3711	(unspec:VSDQ_HSI
3712	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3713	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3714	 VQDMULH))]
3715  "TARGET_SIMD"
3716  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3717  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3718)
3719
3720;; sq<r>dmulh_lane
3721
3722(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3723  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3724        (unspec:VDQHS
3725	  [(match_operand:VDQHS 1 "register_operand" "w")
3726           (vec_select:<VEL>
3727             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3728             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3729	 VQDMULH))]
3730  "TARGET_SIMD"
3731  "*
3732   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3733   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3734  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3735)
3736
3737(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3738  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3739        (unspec:VDQHS
3740	  [(match_operand:VDQHS 1 "register_operand" "w")
3741           (vec_select:<VEL>
3742             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3743             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3744	 VQDMULH))]
3745  "TARGET_SIMD"
3746  "*
3747   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3748   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3749  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3750)
3751
3752(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3753  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3754        (unspec:SD_HSI
3755	  [(match_operand:SD_HSI 1 "register_operand" "w")
3756           (vec_select:<VEL>
3757             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3758             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3759	 VQDMULH))]
3760  "TARGET_SIMD"
3761  "*
3762   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3763   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3764  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3765)
3766
3767(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3768  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3769        (unspec:SD_HSI
3770	  [(match_operand:SD_HSI 1 "register_operand" "w")
3771           (vec_select:<VEL>
3772             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3773             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3774	 VQDMULH))]
3775  "TARGET_SIMD"
3776  "*
3777   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3778   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3779  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3780)
3781
3782;; sqrdml[as]h.
3783
3784(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3785  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3786	(unspec:VSDQ_HSI
3787	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3788	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
3789	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3790	  SQRDMLH_AS))]
3791   "TARGET_SIMD_RDMA"
3792   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3793   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3794)
3795
3796;; sqrdml[as]h_lane.
3797
3798(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3799  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3800	(unspec:VDQHS
3801	  [(match_operand:VDQHS 1 "register_operand" "0")
3802	   (match_operand:VDQHS 2 "register_operand" "w")
3803	   (vec_select:<VEL>
3804	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3805	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3806	  SQRDMLH_AS))]
3807   "TARGET_SIMD_RDMA"
3808   {
3809     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3810     return
3811      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3812   }
3813   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3814)
3815
3816(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3817  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3818	(unspec:SD_HSI
3819	  [(match_operand:SD_HSI 1 "register_operand" "0")
3820	   (match_operand:SD_HSI 2 "register_operand" "w")
3821	   (vec_select:<VEL>
3822	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3823	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3824	  SQRDMLH_AS))]
3825   "TARGET_SIMD_RDMA"
3826   {
3827     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3828     return
3829      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3830   }
3831   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3832)
3833
3834;; sqrdml[as]h_laneq.
3835
3836(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3837  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3838	(unspec:VDQHS
3839	  [(match_operand:VDQHS 1 "register_operand" "0")
3840	   (match_operand:VDQHS 2 "register_operand" "w")
3841	   (vec_select:<VEL>
3842	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3843	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3844	  SQRDMLH_AS))]
3845   "TARGET_SIMD_RDMA"
3846   {
3847     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3848     return
3849      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3850   }
3851   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3852)
3853
3854(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3855  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3856	(unspec:SD_HSI
3857	  [(match_operand:SD_HSI 1 "register_operand" "0")
3858	   (match_operand:SD_HSI 2 "register_operand" "w")
3859	   (vec_select:<VEL>
3860	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3861	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3862	  SQRDMLH_AS))]
3863   "TARGET_SIMD_RDMA"
3864   {
3865     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3866     return
3867      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3868   }
3869   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3870)
3871
3872;; vqdml[sa]l
3873
3874(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3875  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876        (SBINQOPS:<VWIDE>
3877	  (match_operand:<VWIDE> 1 "register_operand" "0")
3878	  (ss_ashift:<VWIDE>
3879	      (mult:<VWIDE>
3880		(sign_extend:<VWIDE>
3881		      (match_operand:VSD_HSI 2 "register_operand" "w"))
3882		(sign_extend:<VWIDE>
3883		      (match_operand:VSD_HSI 3 "register_operand" "w")))
3884	      (const_int 1))))]
3885  "TARGET_SIMD"
3886  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3887  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3888)
3889
3890;; vqdml[sa]l_lane
3891
3892(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3893  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894        (SBINQOPS:<VWIDE>
3895	  (match_operand:<VWIDE> 1 "register_operand" "0")
3896	  (ss_ashift:<VWIDE>
3897	    (mult:<VWIDE>
3898	      (sign_extend:<VWIDE>
3899		(match_operand:VD_HSI 2 "register_operand" "w"))
3900	      (sign_extend:<VWIDE>
3901		(vec_duplicate:VD_HSI
3902		  (vec_select:<VEL>
3903		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3904		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3905              ))
3906	    (const_int 1))))]
3907  "TARGET_SIMD"
3908  {
3909    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3910    return
3911      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3912  }
3913  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3914)
3915
3916(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3917  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918        (SBINQOPS:<VWIDE>
3919	  (match_operand:<VWIDE> 1 "register_operand" "0")
3920	  (ss_ashift:<VWIDE>
3921	    (mult:<VWIDE>
3922	      (sign_extend:<VWIDE>
3923		(match_operand:VD_HSI 2 "register_operand" "w"))
3924	      (sign_extend:<VWIDE>
3925		(vec_duplicate:VD_HSI
3926		  (vec_select:<VEL>
3927		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3928		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3929              ))
3930	    (const_int 1))))]
3931  "TARGET_SIMD"
3932  {
3933    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3934    return
3935      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3936  }
3937  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3938)
3939
3940(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3941  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942        (SBINQOPS:<VWIDE>
3943	  (match_operand:<VWIDE> 1 "register_operand" "0")
3944	  (ss_ashift:<VWIDE>
3945	    (mult:<VWIDE>
3946	      (sign_extend:<VWIDE>
3947		(match_operand:SD_HSI 2 "register_operand" "w"))
3948	      (sign_extend:<VWIDE>
3949		(vec_select:<VEL>
3950		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3951		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3952              )
3953	    (const_int 1))))]
3954  "TARGET_SIMD"
3955  {
3956    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3957    return
3958      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3959  }
3960  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3961)
3962
3963(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3964  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3965        (SBINQOPS:<VWIDE>
3966	  (match_operand:<VWIDE> 1 "register_operand" "0")
3967	  (ss_ashift:<VWIDE>
3968	    (mult:<VWIDE>
3969	      (sign_extend:<VWIDE>
3970		(match_operand:SD_HSI 2 "register_operand" "w"))
3971	      (sign_extend:<VWIDE>
3972		(vec_select:<VEL>
3973		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975              )
3976	    (const_int 1))))]
3977  "TARGET_SIMD"
3978  {
3979    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980    return
3981      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982  }
3983  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984)
3985
3986;; vqdml[sa]l_n
3987
3988(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3989  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3990        (SBINQOPS:<VWIDE>
3991	  (match_operand:<VWIDE> 1 "register_operand" "0")
3992	  (ss_ashift:<VWIDE>
3993	      (mult:<VWIDE>
3994		(sign_extend:<VWIDE>
3995		      (match_operand:VD_HSI 2 "register_operand" "w"))
3996		(sign_extend:<VWIDE>
3997		  (vec_duplicate:VD_HSI
3998		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3999	      (const_int 1))))]
4000  "TARGET_SIMD"
4001  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4002  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4003)
4004
4005;; sqdml[as]l2
4006
4007(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4008  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009        (SBINQOPS:<VWIDE>
4010         (match_operand:<VWIDE> 1 "register_operand" "0")
4011         (ss_ashift:<VWIDE>
4012             (mult:<VWIDE>
4013               (sign_extend:<VWIDE>
4014                 (vec_select:<VHALF>
4015                     (match_operand:VQ_HSI 2 "register_operand" "w")
4016                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017               (sign_extend:<VWIDE>
4018                 (vec_select:<VHALF>
4019                     (match_operand:VQ_HSI 3 "register_operand" "w")
4020                     (match_dup 4))))
4021             (const_int 1))))]
4022  "TARGET_SIMD"
4023  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4024  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4025)
4026
4027(define_expand "aarch64_sqdmlal2<mode>"
4028  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4029   (match_operand:<VWIDE> 1 "register_operand" "w")
4030   (match_operand:VQ_HSI 2 "register_operand" "w")
4031   (match_operand:VQ_HSI 3 "register_operand" "w")]
4032  "TARGET_SIMD"
4033{
4034  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4035  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4036						  operands[2], operands[3], p));
4037  DONE;
4038})
4039
4040(define_expand "aarch64_sqdmlsl2<mode>"
4041  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042   (match_operand:<VWIDE> 1 "register_operand" "w")
4043   (match_operand:VQ_HSI 2 "register_operand" "w")
4044   (match_operand:VQ_HSI 3 "register_operand" "w")]
4045  "TARGET_SIMD"
4046{
4047  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4049						  operands[2], operands[3], p));
4050  DONE;
4051})
4052
4053;; vqdml[sa]l2_lane
4054
4055(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4056  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4057        (SBINQOPS:<VWIDE>
4058	  (match_operand:<VWIDE> 1 "register_operand" "0")
4059	  (ss_ashift:<VWIDE>
4060	      (mult:<VWIDE>
4061		(sign_extend:<VWIDE>
4062                  (vec_select:<VHALF>
4063                    (match_operand:VQ_HSI 2 "register_operand" "w")
4064                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4065		(sign_extend:<VWIDE>
4066                  (vec_duplicate:<VHALF>
4067		    (vec_select:<VEL>
4068		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4069		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4070		    ))))
4071	      (const_int 1))))]
4072  "TARGET_SIMD"
4073  {
4074    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4075    return
4076     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4077  }
4078  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4079)
4080
4081(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4082  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4083        (SBINQOPS:<VWIDE>
4084	  (match_operand:<VWIDE> 1 "register_operand" "0")
4085	  (ss_ashift:<VWIDE>
4086	      (mult:<VWIDE>
4087		(sign_extend:<VWIDE>
4088                  (vec_select:<VHALF>
4089                    (match_operand:VQ_HSI 2 "register_operand" "w")
4090                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4091		(sign_extend:<VWIDE>
4092                  (vec_duplicate:<VHALF>
4093		    (vec_select:<VEL>
4094		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4095		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4096		    ))))
4097	      (const_int 1))))]
4098  "TARGET_SIMD"
4099  {
4100    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4101    return
4102     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4103  }
4104  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4105)
4106
4107(define_expand "aarch64_sqdmlal2_lane<mode>"
4108  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4109   (match_operand:<VWIDE> 1 "register_operand" "w")
4110   (match_operand:VQ_HSI 2 "register_operand" "w")
4111   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4112   (match_operand:SI 4 "immediate_operand" "i")]
4113  "TARGET_SIMD"
4114{
4115  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4117						       operands[2], operands[3],
4118						       operands[4], p));
4119  DONE;
4120})
4121
4122(define_expand "aarch64_sqdmlal2_laneq<mode>"
4123  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124   (match_operand:<VWIDE> 1 "register_operand" "w")
4125   (match_operand:VQ_HSI 2 "register_operand" "w")
4126   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127   (match_operand:SI 4 "immediate_operand" "i")]
4128  "TARGET_SIMD"
4129{
4130  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4131  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4132						       operands[2], operands[3],
4133						       operands[4], p));
4134  DONE;
4135})
4136
4137(define_expand "aarch64_sqdmlsl2_lane<mode>"
4138  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4139   (match_operand:<VWIDE> 1 "register_operand" "w")
4140   (match_operand:VQ_HSI 2 "register_operand" "w")
4141   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4142   (match_operand:SI 4 "immediate_operand" "i")]
4143  "TARGET_SIMD"
4144{
4145  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4146  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4147						       operands[2], operands[3],
4148						       operands[4], p));
4149  DONE;
4150})
4151
4152(define_expand "aarch64_sqdmlsl2_laneq<mode>"
4153  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4154   (match_operand:<VWIDE> 1 "register_operand" "w")
4155   (match_operand:VQ_HSI 2 "register_operand" "w")
4156   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4157   (match_operand:SI 4 "immediate_operand" "i")]
4158  "TARGET_SIMD"
4159{
4160  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4161  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4162						       operands[2], operands[3],
4163						       operands[4], p));
4164  DONE;
4165})
4166
4167(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4168  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4169        (SBINQOPS:<VWIDE>
4170	  (match_operand:<VWIDE> 1 "register_operand" "0")
4171	  (ss_ashift:<VWIDE>
4172	    (mult:<VWIDE>
4173	      (sign_extend:<VWIDE>
4174                (vec_select:<VHALF>
4175                  (match_operand:VQ_HSI 2 "register_operand" "w")
4176                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4177	      (sign_extend:<VWIDE>
4178                (vec_duplicate:<VHALF>
4179		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4180	    (const_int 1))))]
4181  "TARGET_SIMD"
4182  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4183  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4184)
4185
4186(define_expand "aarch64_sqdmlal2_n<mode>"
4187  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4188   (match_operand:<VWIDE> 1 "register_operand" "w")
4189   (match_operand:VQ_HSI 2 "register_operand" "w")
4190   (match_operand:<VEL> 3 "register_operand" "w")]
4191  "TARGET_SIMD"
4192{
4193  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4195						    operands[2], operands[3],
4196						    p));
4197  DONE;
4198})
4199
4200(define_expand "aarch64_sqdmlsl2_n<mode>"
4201  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4202   (match_operand:<VWIDE> 1 "register_operand" "w")
4203   (match_operand:VQ_HSI 2 "register_operand" "w")
4204   (match_operand:<VEL> 3 "register_operand" "w")]
4205  "TARGET_SIMD"
4206{
4207  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4209						    operands[2], operands[3],
4210						    p));
4211  DONE;
4212})
4213
4214;; vqdmull
4215
4216(define_insn "aarch64_sqdmull<mode>"
4217  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218        (ss_ashift:<VWIDE>
4219	     (mult:<VWIDE>
4220	       (sign_extend:<VWIDE>
4221		     (match_operand:VSD_HSI 1 "register_operand" "w"))
4222	       (sign_extend:<VWIDE>
4223		     (match_operand:VSD_HSI 2 "register_operand" "w")))
4224	     (const_int 1)))]
4225  "TARGET_SIMD"
4226  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4227  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4228)
4229
4230;; vqdmull_lane
4231
4232(define_insn "aarch64_sqdmull_lane<mode>"
4233  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234        (ss_ashift:<VWIDE>
4235	     (mult:<VWIDE>
4236	       (sign_extend:<VWIDE>
4237		 (match_operand:VD_HSI 1 "register_operand" "w"))
4238	       (sign_extend:<VWIDE>
4239                 (vec_duplicate:VD_HSI
4240                   (vec_select:<VEL>
4241		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4242		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4243	       ))
4244	     (const_int 1)))]
4245  "TARGET_SIMD"
4246  {
4247    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4248    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4249  }
4250  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4251)
4252
4253(define_insn "aarch64_sqdmull_laneq<mode>"
4254  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4255        (ss_ashift:<VWIDE>
4256	     (mult:<VWIDE>
4257	       (sign_extend:<VWIDE>
4258		 (match_operand:VD_HSI 1 "register_operand" "w"))
4259	       (sign_extend:<VWIDE>
4260                 (vec_duplicate:VD_HSI
4261                   (vec_select:<VEL>
4262		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4263		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4264	       ))
4265	     (const_int 1)))]
4266  "TARGET_SIMD"
4267  {
4268    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4269    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4270  }
4271  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4272)
4273
4274(define_insn "aarch64_sqdmull_lane<mode>"
4275  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4276        (ss_ashift:<VWIDE>
4277	     (mult:<VWIDE>
4278	       (sign_extend:<VWIDE>
4279		 (match_operand:SD_HSI 1 "register_operand" "w"))
4280	       (sign_extend:<VWIDE>
4281                 (vec_select:<VEL>
4282		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4283		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4284	       ))
4285	     (const_int 1)))]
4286  "TARGET_SIMD"
4287  {
4288    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4289    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4290  }
4291  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4292)
4293
4294(define_insn "aarch64_sqdmull_laneq<mode>"
4295  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4296        (ss_ashift:<VWIDE>
4297	     (mult:<VWIDE>
4298	       (sign_extend:<VWIDE>
4299		 (match_operand:SD_HSI 1 "register_operand" "w"))
4300	       (sign_extend:<VWIDE>
4301                 (vec_select:<VEL>
4302		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4303		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4304	       ))
4305	     (const_int 1)))]
4306  "TARGET_SIMD"
4307  {
4308    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4309    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4310  }
4311  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4312)
4313
4314;; vqdmull_n
4315
4316(define_insn "aarch64_sqdmull_n<mode>"
4317  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4318        (ss_ashift:<VWIDE>
4319	     (mult:<VWIDE>
4320	       (sign_extend:<VWIDE>
4321		 (match_operand:VD_HSI 1 "register_operand" "w"))
4322	       (sign_extend:<VWIDE>
4323                 (vec_duplicate:VD_HSI
4324                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4325	       )
4326	     (const_int 1)))]
4327  "TARGET_SIMD"
4328  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4329  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4330)
4331
4332;; vqdmull2
4333
4334
4335
4336(define_insn "aarch64_sqdmull2<mode>_internal"
4337  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338        (ss_ashift:<VWIDE>
4339	     (mult:<VWIDE>
4340	       (sign_extend:<VWIDE>
4341		 (vec_select:<VHALF>
4342                   (match_operand:VQ_HSI 1 "register_operand" "w")
4343                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4344	       (sign_extend:<VWIDE>
4345		 (vec_select:<VHALF>
4346                   (match_operand:VQ_HSI 2 "register_operand" "w")
4347                   (match_dup 3)))
4348	       )
4349	     (const_int 1)))]
4350  "TARGET_SIMD"
4351  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4352  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4353)
4354
4355(define_expand "aarch64_sqdmull2<mode>"
4356  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4357   (match_operand:VQ_HSI 1 "register_operand" "w")
4358   (match_operand:VQ_HSI 2 "register_operand" "w")]
4359  "TARGET_SIMD"
4360{
4361  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4362  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4363						  operands[2], p));
4364  DONE;
4365})
4366
4367;; vqdmull2_lane
4368
4369(define_insn "aarch64_sqdmull2_lane<mode>_internal"
4370  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371        (ss_ashift:<VWIDE>
4372	     (mult:<VWIDE>
4373	       (sign_extend:<VWIDE>
4374		 (vec_select:<VHALF>
4375                   (match_operand:VQ_HSI 1 "register_operand" "w")
4376                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4377	       (sign_extend:<VWIDE>
4378                 (vec_duplicate:<VHALF>
4379                   (vec_select:<VEL>
4380		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4381		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4382	       ))
4383	     (const_int 1)))]
4384  "TARGET_SIMD"
4385  {
4386    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4387    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4388  }
4389  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4390)
4391
4392(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4393  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4394        (ss_ashift:<VWIDE>
4395	     (mult:<VWIDE>
4396	       (sign_extend:<VWIDE>
4397		 (vec_select:<VHALF>
4398                   (match_operand:VQ_HSI 1 "register_operand" "w")
4399                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4400	       (sign_extend:<VWIDE>
4401                 (vec_duplicate:<VHALF>
4402                   (vec_select:<VEL>
4403		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4404		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4405	       ))
4406	     (const_int 1)))]
4407  "TARGET_SIMD"
4408  {
4409    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4410    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4411  }
4412  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4413)
4414
4415(define_expand "aarch64_sqdmull2_lane<mode>"
4416  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4417   (match_operand:VQ_HSI 1 "register_operand" "w")
4418   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4419   (match_operand:SI 3 "immediate_operand" "i")]
4420  "TARGET_SIMD"
4421{
4422  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4423  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4424						       operands[2], operands[3],
4425						       p));
4426  DONE;
4427})
4428
4429(define_expand "aarch64_sqdmull2_laneq<mode>"
4430  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4431   (match_operand:VQ_HSI 1 "register_operand" "w")
4432   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4433   (match_operand:SI 3 "immediate_operand" "i")]
4434  "TARGET_SIMD"
4435{
4436  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4437  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4438						       operands[2], operands[3],
4439						       p));
4440  DONE;
4441})
4442
4443;; vqdmull2_n
4444
4445(define_insn "aarch64_sqdmull2_n<mode>_internal"
4446  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4447        (ss_ashift:<VWIDE>
4448	     (mult:<VWIDE>
4449	       (sign_extend:<VWIDE>
4450		 (vec_select:<VHALF>
4451                   (match_operand:VQ_HSI 1 "register_operand" "w")
4452                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4453	       (sign_extend:<VWIDE>
4454                 (vec_duplicate:<VHALF>
4455                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4456	       )
4457	     (const_int 1)))]
4458  "TARGET_SIMD"
4459  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4460  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4461)
4462
4463(define_expand "aarch64_sqdmull2_n<mode>"
4464  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4465   (match_operand:VQ_HSI 1 "register_operand" "w")
4466   (match_operand:<VEL> 2 "register_operand" "w")]
4467  "TARGET_SIMD"
4468{
4469  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4471						    operands[2], p));
4472  DONE;
4473})
4474
4475;; vshl
4476
4477(define_insn "aarch64_<sur>shl<mode>"
4478  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4479        (unspec:VSDQ_I_DI
4480	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4481           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4482         VSHL))]
4483  "TARGET_SIMD"
4484  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4485  [(set_attr "type" "neon_shift_reg<q>")]
4486)
4487
4488
4489;; vqshl
4490
4491(define_insn "aarch64_<sur>q<r>shl<mode>"
4492  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4493        (unspec:VSDQ_I
4494	  [(match_operand:VSDQ_I 1 "register_operand" "w")
4495           (match_operand:VSDQ_I 2 "register_operand" "w")]
4496         VQSHL))]
4497  "TARGET_SIMD"
4498  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4499  [(set_attr "type" "neon_sat_shift_reg<q>")]
4500)
4501
4502;; vshll_n
4503
4504(define_insn "aarch64_<sur>shll_n<mode>"
4505  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4506	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4507			 (match_operand:SI 2
4508			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4509                         VSHLL))]
4510  "TARGET_SIMD"
4511  {
4512    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4513      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4514    else
4515      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4516  }
4517  [(set_attr "type" "neon_shift_imm_long")]
4518)
4519
4520;; vshll_high_n
4521
4522(define_insn "aarch64_<sur>shll2_n<mode>"
4523  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4524	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4525			 (match_operand:SI 2 "immediate_operand" "i")]
4526                         VSHLL))]
4527  "TARGET_SIMD"
4528  {
4529    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4530      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4531    else
4532      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4533  }
4534  [(set_attr "type" "neon_shift_imm_long")]
4535)
4536
4537;; vrshr_n
4538
4539(define_insn "aarch64_<sur>shr_n<mode>"
4540  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4541        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4542			   (match_operand:SI 2
4543			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4544			  VRSHR_N))]
4545  "TARGET_SIMD"
4546  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4547  [(set_attr "type" "neon_sat_shift_imm<q>")]
4548)
4549
4550;; v(r)sra_n
4551
4552(define_insn "aarch64_<sur>sra_n<mode>"
4553  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4554	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4555		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4556                       (match_operand:SI 3
4557			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4558                      VSRA))]
4559  "TARGET_SIMD"
4560  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4561  [(set_attr "type" "neon_shift_acc<q>")]
4562)
4563
4564;; vs<lr>i_n
4565
4566(define_insn "aarch64_<sur>s<lr>i_n<mode>"
4567  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4568	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4569		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4570                       (match_operand:SI 3
4571			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4572                      VSLRI))]
4573  "TARGET_SIMD"
4574  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4575  [(set_attr "type" "neon_shift_imm<q>")]
4576)
4577
4578;; vqshl(u)
4579
4580(define_insn "aarch64_<sur>qshl<u>_n<mode>"
4581  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4582	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4583		       (match_operand:SI 2
4584			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4585                      VQSHL_N))]
4586  "TARGET_SIMD"
4587  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4588  [(set_attr "type" "neon_sat_shift_imm<q>")]
4589)
4590
4591
4592;; vq(r)shr(u)n_n
4593
4594(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4595  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4596        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4597			    (match_operand:SI 2
4598			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4599			   VQSHRN_N))]
4600  "TARGET_SIMD"
4601  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4602  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4603)
4604
4605
4606;; cm(eq|ge|gt|lt|le)
4607;; Note, we have constraints for Dz and Z as different expanders
4608;; have different ideas of what should be passed to this pattern.
4609
4610(define_insn "aarch64_cm<optab><mode>"
4611  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4612	(neg:<V_INT_EQUIV>
4613	  (COMPARISONS:<V_INT_EQUIV>
4614	    (match_operand:VDQ_I 1 "register_operand" "w,w")
4615	    (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4616	  )))]
4617  "TARGET_SIMD"
4618  "@
4619  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4620  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4621  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4622)
4623
4624(define_insn_and_split "aarch64_cm<optab>di"
4625  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4626	(neg:DI
4627	  (COMPARISONS:DI
4628	    (match_operand:DI 1 "register_operand" "w,w,r")
4629	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4630	  )))
4631     (clobber (reg:CC CC_REGNUM))]
4632  "TARGET_SIMD"
4633  "#"
4634  "&& reload_completed"
4635  [(set (match_operand:DI 0 "register_operand")
4636	(neg:DI
4637	  (COMPARISONS:DI
4638	    (match_operand:DI 1 "register_operand")
4639	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4640	  )))]
4641  {
4642    /* If we are in the general purpose register file,
4643       we split to a sequence of comparison and store.  */
4644    if (GP_REGNUM_P (REGNO (operands[0]))
4645	&& GP_REGNUM_P (REGNO (operands[1])))
4646      {
4647	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4648	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4649	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4650	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4651	DONE;
4652      }
4653    /* Otherwise, we expand to a similar pattern which does not
4654       clobber CC_REGNUM.  */
4655  }
4656  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4657)
4658
4659(define_insn "*aarch64_cm<optab>di"
4660  [(set (match_operand:DI 0 "register_operand" "=w,w")
4661	(neg:DI
4662	  (COMPARISONS:DI
4663	    (match_operand:DI 1 "register_operand" "w,w")
4664	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4665	  )))]
4666  "TARGET_SIMD && reload_completed"
4667  "@
4668  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4669  cm<optab>\t%d0, %d1, #0"
4670  [(set_attr "type" "neon_compare, neon_compare_zero")]
4671)
4672
4673;; cm(hs|hi)
4674
4675(define_insn "aarch64_cm<optab><mode>"
4676  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4677	(neg:<V_INT_EQUIV>
4678	  (UCOMPARISONS:<V_INT_EQUIV>
4679	    (match_operand:VDQ_I 1 "register_operand" "w")
4680	    (match_operand:VDQ_I 2 "register_operand" "w")
4681	  )))]
4682  "TARGET_SIMD"
4683  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4684  [(set_attr "type" "neon_compare<q>")]
4685)
4686
4687(define_insn_and_split "aarch64_cm<optab>di"
4688  [(set (match_operand:DI 0 "register_operand" "=w,r")
4689	(neg:DI
4690	  (UCOMPARISONS:DI
4691	    (match_operand:DI 1 "register_operand" "w,r")
4692	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4693	  )))
4694    (clobber (reg:CC CC_REGNUM))]
4695  "TARGET_SIMD"
4696  "#"
4697  "&& reload_completed"
4698  [(set (match_operand:DI 0 "register_operand")
4699	(neg:DI
4700	  (UCOMPARISONS:DI
4701	    (match_operand:DI 1 "register_operand")
4702	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4703	  )))]
4704  {
4705    /* If we are in the general purpose register file,
4706       we split to a sequence of comparison and store.  */
4707    if (GP_REGNUM_P (REGNO (operands[0]))
4708	&& GP_REGNUM_P (REGNO (operands[1])))
4709      {
4710	machine_mode mode = CCmode;
4711	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4712	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4713	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4714	DONE;
4715      }
4716    /* Otherwise, we expand to a similar pattern which does not
4717       clobber CC_REGNUM.  */
4718  }
4719  [(set_attr "type" "neon_compare,multiple")]
4720)
4721
4722(define_insn "*aarch64_cm<optab>di"
4723  [(set (match_operand:DI 0 "register_operand" "=w")
4724	(neg:DI
4725	  (UCOMPARISONS:DI
4726	    (match_operand:DI 1 "register_operand" "w")
4727	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4728	  )))]
4729  "TARGET_SIMD && reload_completed"
4730  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4731  [(set_attr "type" "neon_compare")]
4732)
4733
4734;; cmtst
4735
4736;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4737;; we don't have any insns using ne, and aarch64_vcond outputs
4738;; not (neg (eq (and x y) 0))
4739;; which is rewritten by simplify_rtx as
4740;; plus (eq (and x y) 0) -1.
4741
4742(define_insn "aarch64_cmtst<mode>"
4743  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4744	(plus:<V_INT_EQUIV>
4745	  (eq:<V_INT_EQUIV>
4746	    (and:VDQ_I
4747	      (match_operand:VDQ_I 1 "register_operand" "w")
4748	      (match_operand:VDQ_I 2 "register_operand" "w"))
4749	    (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4750	  (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4751  ]
4752  "TARGET_SIMD"
4753  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4754  [(set_attr "type" "neon_tst<q>")]
4755)
4756
4757(define_insn_and_split "aarch64_cmtstdi"
4758  [(set (match_operand:DI 0 "register_operand" "=w,r")
4759	(neg:DI
4760	  (ne:DI
4761	    (and:DI
4762	      (match_operand:DI 1 "register_operand" "w,r")
4763	      (match_operand:DI 2 "register_operand" "w,r"))
4764	    (const_int 0))))
4765    (clobber (reg:CC CC_REGNUM))]
4766  "TARGET_SIMD"
4767  "#"
4768  "&& reload_completed"
4769  [(set (match_operand:DI 0 "register_operand")
4770	(neg:DI
4771	  (ne:DI
4772	    (and:DI
4773	      (match_operand:DI 1 "register_operand")
4774	      (match_operand:DI 2 "register_operand"))
4775	    (const_int 0))))]
4776  {
4777    /* If we are in the general purpose register file,
4778       we split to a sequence of comparison and store.  */
4779    if (GP_REGNUM_P (REGNO (operands[0]))
4780	&& GP_REGNUM_P (REGNO (operands[1])))
4781      {
4782	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4783	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4784	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4785	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4786	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4787	DONE;
4788      }
4789    /* Otherwise, we expand to a similar pattern which does not
4790       clobber CC_REGNUM.  */
4791  }
4792  [(set_attr "type" "neon_tst,multiple")]
4793)
4794
4795(define_insn "*aarch64_cmtstdi"
4796  [(set (match_operand:DI 0 "register_operand" "=w")
4797	(neg:DI
4798	  (ne:DI
4799	    (and:DI
4800	      (match_operand:DI 1 "register_operand" "w")
4801	      (match_operand:DI 2 "register_operand" "w"))
4802	    (const_int 0))))]
4803  "TARGET_SIMD"
4804  "cmtst\t%d0, %d1, %d2"
4805  [(set_attr "type" "neon_tst")]
4806)
4807
4808;; fcm(eq|ge|gt|le|lt)
4809
4810(define_insn "aarch64_cm<optab><mode>"
4811  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4812	(neg:<V_INT_EQUIV>
4813	  (COMPARISONS:<V_INT_EQUIV>
4814	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4815	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4816	  )))]
4817  "TARGET_SIMD"
4818  "@
4819  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4820  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4821  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4822)
4823
4824;; fac(ge|gt)
4825;; Note we can also handle what would be fac(le|lt) by
4826;; generating fac(ge|gt).
4827
4828(define_insn "aarch64_fac<optab><mode>"
4829  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4830	(neg:<V_INT_EQUIV>
4831	  (FAC_COMPARISONS:<V_INT_EQUIV>
4832	    (abs:VHSDF_HSDF
4833	      (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4834	    (abs:VHSDF_HSDF
4835	      (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4836  )))]
4837  "TARGET_SIMD"
4838  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4839  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4840)
4841
4842;; addp
4843
4844(define_insn "aarch64_addp<mode>"
4845  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4846        (unspec:VD_BHSI
4847          [(match_operand:VD_BHSI 1 "register_operand" "w")
4848	   (match_operand:VD_BHSI 2 "register_operand" "w")]
4849          UNSPEC_ADDP))]
4850  "TARGET_SIMD"
4851  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4852  [(set_attr "type" "neon_reduc_add<q>")]
4853)
4854
4855(define_insn "aarch64_addpdi"
4856  [(set (match_operand:DI 0 "register_operand" "=w")
4857        (unspec:DI
4858          [(match_operand:V2DI 1 "register_operand" "w")]
4859          UNSPEC_ADDP))]
4860  "TARGET_SIMD"
4861  "addp\t%d0, %1.2d"
4862  [(set_attr "type" "neon_reduc_add")]
4863)
4864
4865;; sqrt
4866
4867(define_expand "sqrt<mode>2"
4868  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4869	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4870  "TARGET_SIMD"
4871{
4872  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4873    DONE;
4874})
4875
4876(define_insn "*sqrt<mode>2"
4877  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4878	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4879  "TARGET_SIMD"
4880  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4881  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4882)
4883
4884;; Patterns for vector struct loads and stores.
4885
4886(define_insn "aarch64_simd_ld2<mode>"
4887  [(set (match_operand:OI 0 "register_operand" "=w")
4888	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4889		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4890		   UNSPEC_LD2))]
4891  "TARGET_SIMD"
4892  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4893  [(set_attr "type" "neon_load2_2reg<q>")]
4894)
4895
4896(define_insn "aarch64_simd_ld2r<mode>"
4897  [(set (match_operand:OI 0 "register_operand" "=w")
4898       (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4899                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4900                  UNSPEC_LD2_DUP))]
4901  "TARGET_SIMD"
4902  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4903  [(set_attr "type" "neon_load2_all_lanes<q>")]
4904)
4905
4906(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4907  [(set (match_operand:OI 0 "register_operand" "=w")
4908	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4909		    (match_operand:OI 2 "register_operand" "0")
4910		    (match_operand:SI 3 "immediate_operand" "i")
4911		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4912		   UNSPEC_LD2_LANE))]
4913  "TARGET_SIMD"
4914  {
4915    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4916    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4917  }
4918  [(set_attr "type" "neon_load2_one_lane")]
4919)
4920
4921(define_expand "vec_load_lanesoi<mode>"
4922  [(set (match_operand:OI 0 "register_operand" "=w")
4923	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4924		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4925		   UNSPEC_LD2))]
4926  "TARGET_SIMD"
4927{
4928  if (BYTES_BIG_ENDIAN)
4929    {
4930      rtx tmp = gen_reg_rtx (OImode);
4931      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4932      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4933      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4934    }
4935  else
4936    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4937  DONE;
4938})
4939
4940(define_insn "aarch64_simd_st2<mode>"
4941  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4942	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4943                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4944                   UNSPEC_ST2))]
4945  "TARGET_SIMD"
4946  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4947  [(set_attr "type" "neon_store2_2reg<q>")]
4948)
4949
4950;; RTL uses GCC vector extension indices, so flip only for assembly.
4951(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4952  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4953	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4954		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4955		    (match_operand:SI 2 "immediate_operand" "i")]
4956		   UNSPEC_ST2_LANE))]
4957  "TARGET_SIMD"
4958  {
4959    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4960    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4961  }
4962  [(set_attr "type" "neon_store2_one_lane<q>")]
4963)
4964
4965(define_expand "vec_store_lanesoi<mode>"
4966  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4967	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4968                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4969                   UNSPEC_ST2))]
4970  "TARGET_SIMD"
4971{
4972  if (BYTES_BIG_ENDIAN)
4973    {
4974      rtx tmp = gen_reg_rtx (OImode);
4975      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4976      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4977      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4978    }
4979  else
4980    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4981  DONE;
4982})
4983
4984(define_insn "aarch64_simd_ld3<mode>"
4985  [(set (match_operand:CI 0 "register_operand" "=w")
4986	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4987		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4988		   UNSPEC_LD3))]
4989  "TARGET_SIMD"
4990  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4991  [(set_attr "type" "neon_load3_3reg<q>")]
4992)
4993
4994(define_insn "aarch64_simd_ld3r<mode>"
4995  [(set (match_operand:CI 0 "register_operand" "=w")
4996       (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4997                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4998                  UNSPEC_LD3_DUP))]
4999  "TARGET_SIMD"
5000  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5001  [(set_attr "type" "neon_load3_all_lanes<q>")]
5002)
5003
5004(define_insn "aarch64_vec_load_lanesci_lane<mode>"
5005  [(set (match_operand:CI 0 "register_operand" "=w")
5006	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5007		    (match_operand:CI 2 "register_operand" "0")
5008		    (match_operand:SI 3 "immediate_operand" "i")
5009		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010		   UNSPEC_LD3_LANE))]
5011  "TARGET_SIMD"
5012{
5013    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5014    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5015}
5016  [(set_attr "type" "neon_load3_one_lane")]
5017)
5018
5019(define_expand "vec_load_lanesci<mode>"
5020  [(set (match_operand:CI 0 "register_operand" "=w")
5021	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5022		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5023		   UNSPEC_LD3))]
5024  "TARGET_SIMD"
5025{
5026  if (BYTES_BIG_ENDIAN)
5027    {
5028      rtx tmp = gen_reg_rtx (CImode);
5029      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5030      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5031      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5032    }
5033  else
5034    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5035  DONE;
5036})
5037
5038(define_insn "aarch64_simd_st3<mode>"
5039  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5040	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
5041                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5042                   UNSPEC_ST3))]
5043  "TARGET_SIMD"
5044  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5045  [(set_attr "type" "neon_store3_3reg<q>")]
5046)
5047
5048;; RTL uses GCC vector extension indices, so flip only for assembly.
5049(define_insn "aarch64_vec_store_lanesci_lane<mode>"
5050  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5051	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5052		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5053		     (match_operand:SI 2 "immediate_operand" "i")]
5054		    UNSPEC_ST3_LANE))]
5055  "TARGET_SIMD"
5056  {
5057    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5058    return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5059  }
5060  [(set_attr "type" "neon_store3_one_lane<q>")]
5061)
5062
5063(define_expand "vec_store_lanesci<mode>"
5064  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5065	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
5066                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067                   UNSPEC_ST3))]
5068  "TARGET_SIMD"
5069{
5070  if (BYTES_BIG_ENDIAN)
5071    {
5072      rtx tmp = gen_reg_rtx (CImode);
5073      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5074      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5075      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5076    }
5077  else
5078    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5079  DONE;
5080})
5081
5082(define_insn "aarch64_simd_ld4<mode>"
5083  [(set (match_operand:XI 0 "register_operand" "=w")
5084	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5085		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5086		   UNSPEC_LD4))]
5087  "TARGET_SIMD"
5088  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5089  [(set_attr "type" "neon_load4_4reg<q>")]
5090)
5091
5092(define_insn "aarch64_simd_ld4r<mode>"
5093  [(set (match_operand:XI 0 "register_operand" "=w")
5094       (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5095                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5096                  UNSPEC_LD4_DUP))]
5097  "TARGET_SIMD"
5098  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5099  [(set_attr "type" "neon_load4_all_lanes<q>")]
5100)
5101
5102(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5103  [(set (match_operand:XI 0 "register_operand" "=w")
5104	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5105		    (match_operand:XI 2 "register_operand" "0")
5106		    (match_operand:SI 3 "immediate_operand" "i")
5107		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5108		   UNSPEC_LD4_LANE))]
5109  "TARGET_SIMD"
5110{
5111    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5112    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5113}
5114  [(set_attr "type" "neon_load4_one_lane")]
5115)
5116
5117(define_expand "vec_load_lanesxi<mode>"
5118  [(set (match_operand:XI 0 "register_operand" "=w")
5119	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5120		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5121		   UNSPEC_LD4))]
5122  "TARGET_SIMD"
5123{
5124  if (BYTES_BIG_ENDIAN)
5125    {
5126      rtx tmp = gen_reg_rtx (XImode);
5127      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5128      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5129      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5130    }
5131  else
5132    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5133  DONE;
5134})
5135
5136(define_insn "aarch64_simd_st4<mode>"
5137  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5138	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
5139                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140                   UNSPEC_ST4))]
5141  "TARGET_SIMD"
5142  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5143  [(set_attr "type" "neon_store4_4reg<q>")]
5144)
5145
5146;; RTL uses GCC vector extension indices, so flip only for assembly.
5147(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5148  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5149	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5150		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5151		     (match_operand:SI 2 "immediate_operand" "i")]
5152		    UNSPEC_ST4_LANE))]
5153  "TARGET_SIMD"
5154  {
5155    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5156    return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5157  }
5158  [(set_attr "type" "neon_store4_one_lane<q>")]
5159)
5160
5161(define_expand "vec_store_lanesxi<mode>"
5162  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5163	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
5164                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165                   UNSPEC_ST4))]
5166  "TARGET_SIMD"
5167{
5168  if (BYTES_BIG_ENDIAN)
5169    {
5170      rtx tmp = gen_reg_rtx (XImode);
5171      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5172      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5173      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5174    }
5175  else
5176    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5177  DONE;
5178})
5179
5180(define_insn_and_split "aarch64_rev_reglist<mode>"
5181[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5182	(unspec:VSTRUCT
5183	           [(match_operand:VSTRUCT 1 "register_operand" "w")
5184		    (match_operand:V16QI 2 "register_operand" "w")]
5185                   UNSPEC_REV_REGLIST))]
5186  "TARGET_SIMD"
5187  "#"
5188  "&& reload_completed"
5189  [(const_int 0)]
5190{
5191  int i;
5192  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5193  for (i = 0; i < nregs; i++)
5194    {
5195      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5196      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5197      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5198    }
5199  DONE;
5200}
5201  [(set_attr "type" "neon_tbl1_q")
5202   (set_attr "length" "<insn_count>")]
5203)
5204
5205;; Reload patterns for AdvSIMD register list operands.
5206
5207(define_expand "mov<mode>"
5208  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5209	(match_operand:VSTRUCT 1 "general_operand" ""))]
5210  "TARGET_SIMD"
5211{
5212  if (can_create_pseudo_p ())
5213    {
5214      if (GET_CODE (operands[0]) != REG)
5215	operands[1] = force_reg (<MODE>mode, operands[1]);
5216    }
5217})
5218
5219
5220(define_expand "aarch64_ld1x3<VALLDIF:mode>"
5221  [(match_operand:CI 0 "register_operand" "=w")
5222   (match_operand:DI 1 "register_operand" "r")
5223   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224  "TARGET_SIMD"
5225{
5226  rtx mem = gen_rtx_MEM (CImode, operands[1]);
5227  emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5228  DONE;
5229})
5230
5231(define_insn "aarch64_ld1_x3_<mode>"
5232  [(set (match_operand:CI 0 "register_operand" "=w")
5233        (unspec:CI
5234	  [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5235	   (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5236  "TARGET_SIMD"
5237  "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5238  [(set_attr "type" "neon_load1_3reg<q>")]
5239)
5240
5241(define_expand "aarch64_st1x2<VALLDIF:mode>"
5242  [(match_operand:DI 0 "register_operand" "")
5243   (match_operand:OI 1 "register_operand" "")
5244   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245  "TARGET_SIMD"
5246{
5247  rtx mem = gen_rtx_MEM (OImode, operands[0]);
5248  emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5249  DONE;
5250})
5251
5252(define_insn "aarch64_st1_x2_<mode>"
5253   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5254	 (unspec:OI
5255	  [(match_operand:OI 1 "register_operand" "w")
5256          (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5257  "TARGET_SIMD"
5258  "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5259  [(set_attr "type" "neon_store1_2reg<q>")]
5260)
5261
5262(define_expand "aarch64_st1x3<VALLDIF:mode>"
5263  [(match_operand:DI 0 "register_operand" "")
5264   (match_operand:CI 1 "register_operand" "")
5265   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5266  "TARGET_SIMD"
5267{
5268  rtx mem = gen_rtx_MEM (CImode, operands[0]);
5269  emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5270  DONE;
5271})
5272
5273(define_insn "aarch64_st1_x3_<mode>"
5274   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5275	(unspec:CI
5276         [(match_operand:CI 1 "register_operand" "w")
5277	  (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5278  "TARGET_SIMD"
5279  "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5280  [(set_attr "type" "neon_store1_3reg<q>")]
5281)
5282
5283(define_insn "*aarch64_mov<mode>"
5284  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5285	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5286  "TARGET_SIMD && !BYTES_BIG_ENDIAN
5287   && (register_operand (operands[0], <MODE>mode)
5288       || register_operand (operands[1], <MODE>mode))"
5289  "@
5290   #
5291   st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5292   ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5293  [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5294		     neon_load<nregs>_<nregs>reg_q")
5295   (set_attr "length" "<insn_count>,4,4")]
5296)
5297
5298(define_insn "aarch64_be_ld1<mode>"
5299  [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
5300	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5301			     "aarch64_simd_struct_operand" "Utv")]
5302	UNSPEC_LD1))]
5303  "TARGET_SIMD"
5304  "ld1\\t{%0<Vmtype>}, %1"
5305  [(set_attr "type" "neon_load1_1reg<q>")]
5306)
5307
5308(define_insn "aarch64_be_st1<mode>"
5309  [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5310	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5311	UNSPEC_ST1))]
5312  "TARGET_SIMD"
5313  "st1\\t{%1<Vmtype>}, %0"
5314  [(set_attr "type" "neon_store1_1reg<q>")]
5315)
5316
5317(define_insn "*aarch64_be_movoi"
5318  [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5319	(match_operand:OI 1 "general_operand"      " w,w,m"))]
5320  "TARGET_SIMD && BYTES_BIG_ENDIAN
5321   && (register_operand (operands[0], OImode)
5322       || register_operand (operands[1], OImode))"
5323  "@
5324   #
5325   stp\\t%q1, %R1, %0
5326   ldp\\t%q0, %R0, %1"
5327  [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5328   (set_attr "length" "8,4,4")]
5329)
5330
5331(define_insn "*aarch64_be_movci"
5332  [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5333	(match_operand:CI 1 "general_operand"      " w,w,o"))]
5334  "TARGET_SIMD && BYTES_BIG_ENDIAN
5335   && (register_operand (operands[0], CImode)
5336       || register_operand (operands[1], CImode))"
5337  "#"
5338  [(set_attr "type" "multiple")
5339   (set_attr "length" "12,4,4")]
5340)
5341
5342(define_insn "*aarch64_be_movxi"
5343  [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5344	(match_operand:XI 1 "general_operand"      " w,w,o"))]
5345  "TARGET_SIMD && BYTES_BIG_ENDIAN
5346   && (register_operand (operands[0], XImode)
5347       || register_operand (operands[1], XImode))"
5348  "#"
5349  [(set_attr "type" "multiple")
5350   (set_attr "length" "16,4,4")]
5351)
5352
5353(define_split
5354  [(set (match_operand:OI 0 "register_operand")
5355	(match_operand:OI 1 "register_operand"))]
5356  "TARGET_SIMD && reload_completed"
5357  [(const_int 0)]
5358{
5359  aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5360  DONE;
5361})
5362
5363(define_split
5364  [(set (match_operand:CI 0 "nonimmediate_operand")
5365	(match_operand:CI 1 "general_operand"))]
5366  "TARGET_SIMD && reload_completed"
5367  [(const_int 0)]
5368{
5369  if (register_operand (operands[0], CImode)
5370      && register_operand (operands[1], CImode))
5371    {
5372      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5373      DONE;
5374    }
5375  else if (BYTES_BIG_ENDIAN)
5376    {
5377      emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5378		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
5379      emit_move_insn (gen_lowpart (V16QImode,
5380				   simplify_gen_subreg (TImode, operands[0],
5381							CImode, 32)),
5382		      gen_lowpart (V16QImode,
5383				   simplify_gen_subreg (TImode, operands[1],
5384							CImode, 32)));
5385      DONE;
5386    }
5387  else
5388    FAIL;
5389})
5390
5391(define_split
5392  [(set (match_operand:XI 0 "nonimmediate_operand")
5393	(match_operand:XI 1 "general_operand"))]
5394  "TARGET_SIMD && reload_completed"
5395  [(const_int 0)]
5396{
5397  if (register_operand (operands[0], XImode)
5398      && register_operand (operands[1], XImode))
5399    {
5400      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5401      DONE;
5402    }
5403  else if (BYTES_BIG_ENDIAN)
5404    {
5405      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5406		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
5407      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5408		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
5409      DONE;
5410    }
5411  else
5412    FAIL;
5413})
5414
5415(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5416  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5417   (match_operand:DI 1 "register_operand" "w")
5418   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5419  "TARGET_SIMD"
5420{
5421  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5422  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5423		     * <VSTRUCT:nregs>);
5424
5425  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5426								mem));
5427  DONE;
5428})
5429
5430(define_insn "aarch64_ld2<mode>_dreg"
5431  [(set (match_operand:OI 0 "register_operand" "=w")
5432	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5433		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5434		   UNSPEC_LD2_DREG))]
5435  "TARGET_SIMD"
5436  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5437  [(set_attr "type" "neon_load2_2reg<q>")]
5438)
5439
5440(define_insn "aarch64_ld2<mode>_dreg"
5441  [(set (match_operand:OI 0 "register_operand" "=w")
5442	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5443		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444		   UNSPEC_LD2_DREG))]
5445  "TARGET_SIMD"
5446  "ld1\\t{%S0.1d - %T0.1d}, %1"
5447  [(set_attr "type" "neon_load1_2reg<q>")]
5448)
5449
5450(define_insn "aarch64_ld3<mode>_dreg"
5451  [(set (match_operand:CI 0 "register_operand" "=w")
5452	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5453		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5454		   UNSPEC_LD3_DREG))]
5455  "TARGET_SIMD"
5456  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5457  [(set_attr "type" "neon_load3_3reg<q>")]
5458)
5459
5460(define_insn "aarch64_ld3<mode>_dreg"
5461  [(set (match_operand:CI 0 "register_operand" "=w")
5462	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5463		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464		   UNSPEC_LD3_DREG))]
5465  "TARGET_SIMD"
5466  "ld1\\t{%S0.1d - %U0.1d}, %1"
5467  [(set_attr "type" "neon_load1_3reg<q>")]
5468)
5469
5470(define_insn "aarch64_ld4<mode>_dreg"
5471  [(set (match_operand:XI 0 "register_operand" "=w")
5472	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5473		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474		   UNSPEC_LD4_DREG))]
5475  "TARGET_SIMD"
5476  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5477  [(set_attr "type" "neon_load4_4reg<q>")]
5478)
5479
5480(define_insn "aarch64_ld4<mode>_dreg"
5481  [(set (match_operand:XI 0 "register_operand" "=w")
5482	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5483		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5484		   UNSPEC_LD4_DREG))]
5485  "TARGET_SIMD"
5486  "ld1\\t{%S0.1d - %V0.1d}, %1"
5487  [(set_attr "type" "neon_load1_4reg<q>")]
5488)
5489
5490(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5491 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5492  (match_operand:DI 1 "register_operand" "r")
5493  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494  "TARGET_SIMD"
5495{
5496  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5497  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5498
5499  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5500  DONE;
5501})
5502
5503(define_expand "aarch64_ld1<VALL_F16:mode>"
5504 [(match_operand:VALL_F16 0 "register_operand")
5505  (match_operand:DI 1 "register_operand")]
5506  "TARGET_SIMD"
5507{
5508  machine_mode mode = <VALL_F16:MODE>mode;
5509  rtx mem = gen_rtx_MEM (mode, operands[1]);
5510
5511  if (BYTES_BIG_ENDIAN)
5512    emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5513  else
5514    emit_move_insn (operands[0], mem);
5515  DONE;
5516})
5517
5518(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5519 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5520  (match_operand:DI 1 "register_operand" "r")
5521  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522  "TARGET_SIMD"
5523{
5524  machine_mode mode = <VSTRUCT:MODE>mode;
5525  rtx mem = gen_rtx_MEM (mode, operands[1]);
5526
5527  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5528  DONE;
5529})
5530
5531(define_expand "aarch64_ld1x2<VQ:mode>"
5532 [(match_operand:OI 0 "register_operand" "=w")
5533  (match_operand:DI 1 "register_operand" "r")
5534  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535  "TARGET_SIMD"
5536{
5537  machine_mode mode = OImode;
5538  rtx mem = gen_rtx_MEM (mode, operands[1]);
5539
5540  emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5541  DONE;
5542})
5543
5544(define_expand "aarch64_ld1x2<VDC:mode>"
5545 [(match_operand:OI 0 "register_operand" "=w")
5546  (match_operand:DI 1 "register_operand" "r")
5547  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5548  "TARGET_SIMD"
5549{
5550  machine_mode mode = OImode;
5551  rtx mem = gen_rtx_MEM (mode, operands[1]);
5552
5553  emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5554  DONE;
5555})
5556
5557
5558(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5560	(match_operand:DI 1 "register_operand" "w")
5561	(match_operand:VSTRUCT 2 "register_operand" "0")
5562	(match_operand:SI 3 "immediate_operand" "i")
5563	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564  "TARGET_SIMD"
5565{
5566  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5567  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5568		     * <VSTRUCT:nregs>);
5569
5570  aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5571  emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5572	operands[0], mem, operands[2], operands[3]));
5573  DONE;
5574})
5575
5576;; Expanders for builtins to extract vector registers from large
5577;; opaque integer modes.
5578
5579;; D-register list.
5580
5581(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5582 [(match_operand:VDC 0 "register_operand" "=w")
5583  (match_operand:VSTRUCT 1 "register_operand" "w")
5584  (match_operand:SI 2 "immediate_operand" "i")]
5585  "TARGET_SIMD"
5586{
5587  int part = INTVAL (operands[2]);
5588  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5589  int offset = part * 16;
5590
5591  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5592  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5593  DONE;
5594})
5595
5596;; Q-register list.
5597
5598(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5599 [(match_operand:VQ 0 "register_operand" "=w")
5600  (match_operand:VSTRUCT 1 "register_operand" "w")
5601  (match_operand:SI 2 "immediate_operand" "i")]
5602  "TARGET_SIMD"
5603{
5604  int part = INTVAL (operands[2]);
5605  int offset = part * 16;
5606
5607  emit_move_insn (operands[0],
5608		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5609  DONE;
5610})
5611
5612;; Permuted-store expanders for neon intrinsics.
5613
5614;; Permute instructions
5615
5616;; vec_perm support
5617
5618(define_expand "vec_perm<mode>"
5619  [(match_operand:VB 0 "register_operand")
5620   (match_operand:VB 1 "register_operand")
5621   (match_operand:VB 2 "register_operand")
5622   (match_operand:VB 3 "register_operand")]
5623  "TARGET_SIMD"
5624{
5625  aarch64_expand_vec_perm (operands[0], operands[1],
5626			   operands[2], operands[3], <nunits>);
5627  DONE;
5628})
5629
5630(define_insn "aarch64_tbl1<mode>"
5631  [(set (match_operand:VB 0 "register_operand" "=w")
5632	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5633		    (match_operand:VB 2 "register_operand" "w")]
5634		   UNSPEC_TBL))]
5635  "TARGET_SIMD"
5636  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5637  [(set_attr "type" "neon_tbl1<q>")]
5638)
5639
5640;; Two source registers.
5641
5642(define_insn "aarch64_tbl2v16qi"
5643  [(set (match_operand:V16QI 0 "register_operand" "=w")
5644	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5645		       (match_operand:V16QI 2 "register_operand" "w")]
5646		      UNSPEC_TBL))]
5647  "TARGET_SIMD"
5648  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5649  [(set_attr "type" "neon_tbl2_q")]
5650)
5651
5652(define_insn "aarch64_tbl3<mode>"
5653  [(set (match_operand:VB 0 "register_operand" "=w")
5654	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
5655		      (match_operand:VB 2 "register_operand" "w")]
5656		      UNSPEC_TBL))]
5657  "TARGET_SIMD"
5658  "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5659  [(set_attr "type" "neon_tbl3")]
5660)
5661
5662(define_insn "aarch64_tbx4<mode>"
5663  [(set (match_operand:VB 0 "register_operand" "=w")
5664	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5665		      (match_operand:OI 2 "register_operand" "w")
5666		      (match_operand:VB 3 "register_operand" "w")]
5667		      UNSPEC_TBX))]
5668  "TARGET_SIMD"
5669  "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5670  [(set_attr "type" "neon_tbl4")]
5671)
5672
5673;; Three source registers.
5674
5675(define_insn "aarch64_qtbl3<mode>"
5676  [(set (match_operand:VB 0 "register_operand" "=w")
5677	(unspec:VB [(match_operand:CI 1 "register_operand" "w")
5678		      (match_operand:VB 2 "register_operand" "w")]
5679		      UNSPEC_TBL))]
5680  "TARGET_SIMD"
5681  "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5682  [(set_attr "type" "neon_tbl3")]
5683)
5684
5685(define_insn "aarch64_qtbx3<mode>"
5686  [(set (match_operand:VB 0 "register_operand" "=w")
5687	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5688		      (match_operand:CI 2 "register_operand" "w")
5689		      (match_operand:VB 3 "register_operand" "w")]
5690		      UNSPEC_TBX))]
5691  "TARGET_SIMD"
5692  "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5693  [(set_attr "type" "neon_tbl3")]
5694)
5695
5696;; Four source registers.
5697
5698(define_insn "aarch64_qtbl4<mode>"
5699  [(set (match_operand:VB 0 "register_operand" "=w")
5700	(unspec:VB [(match_operand:XI 1 "register_operand" "w")
5701		      (match_operand:VB 2 "register_operand" "w")]
5702		      UNSPEC_TBL))]
5703  "TARGET_SIMD"
5704  "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5705  [(set_attr "type" "neon_tbl4")]
5706)
5707
5708(define_insn "aarch64_qtbx4<mode>"
5709  [(set (match_operand:VB 0 "register_operand" "=w")
5710	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711		      (match_operand:XI 2 "register_operand" "w")
5712		      (match_operand:VB 3 "register_operand" "w")]
5713		      UNSPEC_TBX))]
5714  "TARGET_SIMD"
5715  "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5716  [(set_attr "type" "neon_tbl4")]
5717)
5718
5719(define_insn_and_split "aarch64_combinev16qi"
5720  [(set (match_operand:OI 0 "register_operand" "=w")
5721	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5722		    (match_operand:V16QI 2 "register_operand" "w")]
5723		   UNSPEC_CONCAT))]
5724  "TARGET_SIMD"
5725  "#"
5726  "&& reload_completed"
5727  [(const_int 0)]
5728{
5729  aarch64_split_combinev16qi (operands);
5730  DONE;
5731}
5732[(set_attr "type" "multiple")]
5733)
5734
5735;; This instruction's pattern is generated directly by
5736;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5737;; need corresponding changes there.
5738(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5739  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5740	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5741			  (match_operand:VALL_F16 2 "register_operand" "w")]
5742	 PERMUTE))]
5743  "TARGET_SIMD"
5744  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5745  [(set_attr "type" "neon_permute<q>")]
5746)
5747
5748;; This instruction's pattern is generated directly by
5749;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5750;; need corresponding changes there.  Note that the immediate (third)
5751;; operand is a lane index not a byte index.
5752(define_insn "aarch64_ext<mode>"
5753  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5754        (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5755			  (match_operand:VALL_F16 2 "register_operand" "w")
5756			  (match_operand:SI 3 "immediate_operand" "i")]
5757	 UNSPEC_EXT))]
5758  "TARGET_SIMD"
5759{
5760  operands[3] = GEN_INT (INTVAL (operands[3])
5761      * GET_MODE_UNIT_SIZE (<MODE>mode));
5762  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5763}
5764  [(set_attr "type" "neon_ext<q>")]
5765)
5766
5767;; This instruction's pattern is generated directly by
5768;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5769;; need corresponding changes there.
5770(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5771  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5772	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5773                    REVERSE))]
5774  "TARGET_SIMD"
5775  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5776  [(set_attr "type" "neon_rev<q>")]
5777)
5778
5779(define_insn "aarch64_st2<mode>_dreg"
5780  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5781	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5782                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783                   UNSPEC_ST2))]
5784  "TARGET_SIMD"
5785  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5786  [(set_attr "type" "neon_store2_2reg")]
5787)
5788
5789(define_insn "aarch64_st2<mode>_dreg"
5790  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5791	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5792                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5793                   UNSPEC_ST2))]
5794  "TARGET_SIMD"
5795  "st1\\t{%S1.1d - %T1.1d}, %0"
5796  [(set_attr "type" "neon_store1_2reg")]
5797)
5798
5799(define_insn "aarch64_st3<mode>_dreg"
5800  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5801	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5802                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5803                   UNSPEC_ST3))]
5804  "TARGET_SIMD"
5805  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5806  [(set_attr "type" "neon_store3_3reg")]
5807)
5808
5809(define_insn "aarch64_st3<mode>_dreg"
5810  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5811	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5812                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5813                   UNSPEC_ST3))]
5814  "TARGET_SIMD"
5815  "st1\\t{%S1.1d - %U1.1d}, %0"
5816  [(set_attr "type" "neon_store1_3reg")]
5817)
5818
5819(define_insn "aarch64_st4<mode>_dreg"
5820  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5821	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5822                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5823                   UNSPEC_ST4))]
5824  "TARGET_SIMD"
5825  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5826  [(set_attr "type" "neon_store4_4reg")]
5827)
5828
5829(define_insn "aarch64_st4<mode>_dreg"
5830  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5831	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5832                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5833                   UNSPEC_ST4))]
5834  "TARGET_SIMD"
5835  "st1\\t{%S1.1d - %V1.1d}, %0"
5836  [(set_attr "type" "neon_store1_4reg")]
5837)
5838
5839(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5840 [(match_operand:DI 0 "register_operand" "r")
5841  (match_operand:VSTRUCT 1 "register_operand" "w")
5842  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5843  "TARGET_SIMD"
5844{
5845  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5846  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5847
5848  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5849  DONE;
5850})
5851
5852(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5853 [(match_operand:DI 0 "register_operand" "r")
5854  (match_operand:VSTRUCT 1 "register_operand" "w")
5855  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5856  "TARGET_SIMD"
5857{
5858  machine_mode mode = <VSTRUCT:MODE>mode;
5859  rtx mem = gen_rtx_MEM (mode, operands[0]);
5860
5861  emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5862  DONE;
5863})
5864
5865(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5866 [(match_operand:DI 0 "register_operand" "r")
5867  (match_operand:VSTRUCT 1 "register_operand" "w")
5868  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5869  (match_operand:SI 2 "immediate_operand")]
5870  "TARGET_SIMD"
5871{
5872  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5873  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5874		     * <VSTRUCT:nregs>);
5875
5876  emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5877		mem, operands[1], operands[2]));
5878  DONE;
5879})
5880
5881(define_expand "aarch64_st1<VALL_F16:mode>"
5882 [(match_operand:DI 0 "register_operand")
5883  (match_operand:VALL_F16 1 "register_operand")]
5884  "TARGET_SIMD"
5885{
5886  machine_mode mode = <VALL_F16:MODE>mode;
5887  rtx mem = gen_rtx_MEM (mode, operands[0]);
5888
5889  if (BYTES_BIG_ENDIAN)
5890    emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5891  else
5892    emit_move_insn (mem, operands[1]);
5893  DONE;
5894})
5895
5896;; Expander for builtins to insert vector registers into large
5897;; opaque integer modes.
5898
5899;; Q-register list.  We don't need a D-reg inserter as we zero
5900;; extend them in arm_neon.h and insert the resulting Q-regs.
5901
5902(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5903 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5904  (match_operand:VSTRUCT 1 "register_operand" "0")
5905  (match_operand:VQ 2 "register_operand" "w")
5906  (match_operand:SI 3 "immediate_operand" "i")]
5907  "TARGET_SIMD"
5908{
5909  int part = INTVAL (operands[3]);
5910  int offset = part * 16;
5911
5912  emit_move_insn (operands[0], operands[1]);
5913  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5914		  operands[2]);
5915  DONE;
5916})
5917
5918;; Standard pattern name vec_init<mode><Vel>.
5919
5920(define_expand "vec_init<mode><Vel>"
5921  [(match_operand:VALL_F16 0 "register_operand" "")
5922   (match_operand 1 "" "")]
5923  "TARGET_SIMD"
5924{
5925  aarch64_expand_vector_init (operands[0], operands[1]);
5926  DONE;
5927})
5928
5929(define_insn "*aarch64_simd_ld1r<mode>"
5930  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5931	(vec_duplicate:VALL_F16
5932	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5933  "TARGET_SIMD"
5934  "ld1r\\t{%0.<Vtype>}, %1"
5935  [(set_attr "type" "neon_load1_all_lanes")]
5936)
5937
5938(define_insn "aarch64_simd_ld1<mode>_x2"
5939  [(set (match_operand:OI 0 "register_operand" "=w")
5940	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5941		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5942		   UNSPEC_LD1))]
5943  "TARGET_SIMD"
5944  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5945  [(set_attr "type" "neon_load1_2reg<q>")]
5946)
5947
5948(define_insn "aarch64_simd_ld1<mode>_x2"
5949  [(set (match_operand:OI 0 "register_operand" "=w")
5950	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5951		    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5952		   UNSPEC_LD1))]
5953  "TARGET_SIMD"
5954  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5955  [(set_attr "type" "neon_load1_2reg<q>")]
5956)
5957
5958
5959(define_insn "@aarch64_frecpe<mode>"
5960  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5961	(unspec:VHSDF_HSDF
5962	 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5963	 UNSPEC_FRECPE))]
5964  "TARGET_SIMD"
5965  "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5966  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5967)
5968
5969(define_insn "aarch64_frecpx<mode>"
5970  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5971	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5972	 UNSPEC_FRECPX))]
5973  "TARGET_SIMD"
5974  "frecpx\t%<s>0, %<s>1"
5975  [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5976)
5977
5978(define_insn "@aarch64_frecps<mode>"
5979  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5980	(unspec:VHSDF_HSDF
5981	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5982	  (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5983	  UNSPEC_FRECPS))]
5984  "TARGET_SIMD"
5985  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5986  [(set_attr "type" "neon_fp_recps_<stype><q>")]
5987)
5988
5989(define_insn "aarch64_urecpe<mode>"
5990  [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5991        (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5992                UNSPEC_URECPE))]
5993 "TARGET_SIMD"
5994 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5995  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5996
5997;; Standard pattern name vec_extract<mode><Vel>.
5998
5999(define_expand "vec_extract<mode><Vel>"
6000  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6001   (match_operand:VALL_F16 1 "register_operand" "")
6002   (match_operand:SI 2 "immediate_operand" "")]
6003  "TARGET_SIMD"
6004{
6005    emit_insn
6006      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6007    DONE;
6008})
6009
6010;; aes
6011
6012(define_insn "aarch64_crypto_aes<aes_op>v16qi"
6013  [(set (match_operand:V16QI 0 "register_operand" "=w")
6014	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6015		       (match_operand:V16QI 2 "register_operand" "w")]
6016         CRYPTO_AES))]
6017  "TARGET_SIMD && TARGET_AES"
6018  "aes<aes_op>\\t%0.16b, %2.16b"
6019  [(set_attr "type" "crypto_aese")]
6020)
6021
6022(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6023  [(set (match_operand:V16QI 0 "register_operand" "=w")
6024	(unspec:V16QI [(xor:V16QI
6025			(match_operand:V16QI 1 "register_operand" "%0")
6026			(match_operand:V16QI 2 "register_operand" "w"))
6027		       (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6028		       CRYPTO_AES))]
6029  "TARGET_SIMD && TARGET_AES"
6030  "aes<aes_op>\\t%0.16b, %2.16b"
6031  [(set_attr "type" "crypto_aese")]
6032)
6033
6034(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6035  [(set (match_operand:V16QI 0 "register_operand" "=w")
6036	(unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6037	(xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6038		   (match_operand:V16QI 2 "register_operand" "w"))]
6039	CRYPTO_AES))]
6040  "TARGET_SIMD && TARGET_AES"
6041  "aes<aes_op>\\t%0.16b, %2.16b"
6042  [(set_attr "type" "crypto_aese")]
6043)
6044
6045;; When AES/AESMC fusion is enabled we want the register allocation to
6046;; look like:
6047;;    AESE Vn, _
6048;;    AESMC Vn, Vn
6049;; So prefer to tie operand 1 to operand 0 when fusing.
6050
6051(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6052  [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6053	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6054	 CRYPTO_AESMC))]
6055  "TARGET_SIMD && TARGET_AES"
6056  "aes<aesmc_op>\\t%0.16b, %1.16b"
6057  [(set_attr "type" "crypto_aesmc")
6058   (set_attr_alternative "enabled"
6059     [(if_then_else (match_test
6060		       "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6061		     (const_string "yes" )
6062		     (const_string "no"))
6063      (const_string "yes")])]
6064)
6065
6066;; When AESE/AESMC fusion is enabled we really want to keep the two together
6067;; and enforce the register dependency without scheduling or register
6068;; allocation messing up the order or introducing moves inbetween.
6069;;  Mash the two together during combine.
6070
6071(define_insn "*aarch64_crypto_aese_fused"
6072  [(set (match_operand:V16QI 0 "register_operand" "=&w")
6073	(unspec:V16QI
6074	  [(unspec:V16QI
6075	    [(match_operand:V16QI 1 "register_operand" "0")
6076	     (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6077	  ] UNSPEC_AESMC))]
6078  "TARGET_SIMD && TARGET_AES
6079   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6080  "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6081  [(set_attr "type" "crypto_aese")
6082   (set_attr "length" "8")]
6083)
6084
6085;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6086;; and enforce the register dependency without scheduling or register
6087;; allocation messing up the order or introducing moves inbetween.
6088;;  Mash the two together during combine.
6089
6090(define_insn "*aarch64_crypto_aesd_fused"
6091  [(set (match_operand:V16QI 0 "register_operand" "=&w")
6092	(unspec:V16QI
6093	  [(unspec:V16QI
6094	    [(match_operand:V16QI 1 "register_operand" "0")
6095	     (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6096	  ] UNSPEC_AESIMC))]
6097  "TARGET_SIMD && TARGET_AES
6098   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6099  "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6100  [(set_attr "type" "crypto_aese")
6101   (set_attr "length" "8")]
6102)
6103
6104;; sha1
6105
6106(define_insn "aarch64_crypto_sha1hsi"
6107  [(set (match_operand:SI 0 "register_operand" "=w")
6108        (unspec:SI [(match_operand:SI 1
6109                       "register_operand" "w")]
6110         UNSPEC_SHA1H))]
6111  "TARGET_SIMD && TARGET_SHA2"
6112  "sha1h\\t%s0, %s1"
6113  [(set_attr "type" "crypto_sha1_fast")]
6114)
6115
6116(define_insn "aarch64_crypto_sha1hv4si"
6117  [(set (match_operand:SI 0 "register_operand" "=w")
6118	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6119		     (parallel [(const_int 0)]))]
6120	 UNSPEC_SHA1H))]
6121  "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6122  "sha1h\\t%s0, %s1"
6123  [(set_attr "type" "crypto_sha1_fast")]
6124)
6125
6126(define_insn "aarch64_be_crypto_sha1hv4si"
6127  [(set (match_operand:SI 0 "register_operand" "=w")
6128	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6129		     (parallel [(const_int 3)]))]
6130	 UNSPEC_SHA1H))]
6131  "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6132  "sha1h\\t%s0, %s1"
6133  [(set_attr "type" "crypto_sha1_fast")]
6134)
6135
6136(define_insn "aarch64_crypto_sha1su1v4si"
6137  [(set (match_operand:V4SI 0 "register_operand" "=w")
6138        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6139                      (match_operand:V4SI 2 "register_operand" "w")]
6140         UNSPEC_SHA1SU1))]
6141  "TARGET_SIMD && TARGET_SHA2"
6142  "sha1su1\\t%0.4s, %2.4s"
6143  [(set_attr "type" "crypto_sha1_fast")]
6144)
6145
6146(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6147  [(set (match_operand:V4SI 0 "register_operand" "=w")
6148        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6149                      (match_operand:SI 2 "register_operand" "w")
6150                      (match_operand:V4SI 3 "register_operand" "w")]
6151         CRYPTO_SHA1))]
6152  "TARGET_SIMD && TARGET_SHA2"
6153  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6154  [(set_attr "type" "crypto_sha1_slow")]
6155)
6156
6157(define_insn "aarch64_crypto_sha1su0v4si"
6158  [(set (match_operand:V4SI 0 "register_operand" "=w")
6159        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6160                      (match_operand:V4SI 2 "register_operand" "w")
6161                      (match_operand:V4SI 3 "register_operand" "w")]
6162         UNSPEC_SHA1SU0))]
6163  "TARGET_SIMD && TARGET_SHA2"
6164  "sha1su0\\t%0.4s, %2.4s, %3.4s"
6165  [(set_attr "type" "crypto_sha1_xor")]
6166)
6167
6168;; sha256
6169
6170(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6171  [(set (match_operand:V4SI 0 "register_operand" "=w")
6172        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6173                      (match_operand:V4SI 2 "register_operand" "w")
6174                      (match_operand:V4SI 3 "register_operand" "w")]
6175         CRYPTO_SHA256))]
6176  "TARGET_SIMD && TARGET_SHA2"
6177  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6178  [(set_attr "type" "crypto_sha256_slow")]
6179)
6180
6181(define_insn "aarch64_crypto_sha256su0v4si"
6182  [(set (match_operand:V4SI 0 "register_operand" "=w")
6183        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6184                      (match_operand:V4SI 2 "register_operand" "w")]
6185         UNSPEC_SHA256SU0))]
6186  "TARGET_SIMD && TARGET_SHA2"
6187  "sha256su0\\t%0.4s, %2.4s"
6188  [(set_attr "type" "crypto_sha256_fast")]
6189)
6190
6191(define_insn "aarch64_crypto_sha256su1v4si"
6192  [(set (match_operand:V4SI 0 "register_operand" "=w")
6193        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6194                      (match_operand:V4SI 2 "register_operand" "w")
6195                      (match_operand:V4SI 3 "register_operand" "w")]
6196         UNSPEC_SHA256SU1))]
6197  "TARGET_SIMD && TARGET_SHA2"
6198  "sha256su1\\t%0.4s, %2.4s, %3.4s"
6199  [(set_attr "type" "crypto_sha256_slow")]
6200)
6201
6202;; sha512
6203
6204(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6205  [(set (match_operand:V2DI 0 "register_operand" "=w")
6206        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6207                      (match_operand:V2DI 2 "register_operand" "w")
6208                      (match_operand:V2DI 3 "register_operand" "w")]
6209         CRYPTO_SHA512))]
6210  "TARGET_SIMD && TARGET_SHA3"
6211  "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6212  [(set_attr "type" "crypto_sha512")]
6213)
6214
6215(define_insn "aarch64_crypto_sha512su0qv2di"
6216  [(set (match_operand:V2DI 0 "register_operand" "=w")
6217        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6218                      (match_operand:V2DI 2 "register_operand" "w")]
6219         UNSPEC_SHA512SU0))]
6220  "TARGET_SIMD && TARGET_SHA3"
6221  "sha512su0\\t%0.2d, %2.2d"
6222  [(set_attr "type" "crypto_sha512")]
6223)
6224
6225(define_insn "aarch64_crypto_sha512su1qv2di"
6226  [(set (match_operand:V2DI 0 "register_operand" "=w")
6227        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6228                      (match_operand:V2DI 2 "register_operand" "w")
6229                      (match_operand:V2DI 3 "register_operand" "w")]
6230         UNSPEC_SHA512SU1))]
6231  "TARGET_SIMD && TARGET_SHA3"
6232  "sha512su1\\t%0.2d, %2.2d, %3.2d"
6233  [(set_attr "type" "crypto_sha512")]
6234)
6235
6236;; sha3
6237
6238(define_insn "eor3q<mode>4"
6239  [(set (match_operand:VQ_I 0 "register_operand" "=w")
6240	(xor:VQ_I
6241	 (xor:VQ_I
6242	  (match_operand:VQ_I 2 "register_operand" "w")
6243	  (match_operand:VQ_I 3 "register_operand" "w"))
6244	 (match_operand:VQ_I 1 "register_operand" "w")))]
6245  "TARGET_SIMD && TARGET_SHA3"
6246  "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6247  [(set_attr "type" "crypto_sha3")]
6248)
6249
6250(define_insn "aarch64_rax1qv2di"
6251  [(set (match_operand:V2DI 0 "register_operand" "=w")
6252	(xor:V2DI
6253	 (rotate:V2DI
6254	  (match_operand:V2DI 2 "register_operand" "w")
6255	  (const_int 1))
6256	 (match_operand:V2DI 1 "register_operand" "w")))]
6257  "TARGET_SIMD && TARGET_SHA3"
6258  "rax1\\t%0.2d, %1.2d, %2.2d"
6259  [(set_attr "type" "crypto_sha3")]
6260)
6261
6262(define_insn "aarch64_xarqv2di"
6263  [(set (match_operand:V2DI 0 "register_operand" "=w")
6264	(rotatert:V2DI
6265	 (xor:V2DI
6266	  (match_operand:V2DI 1 "register_operand" "%w")
6267	  (match_operand:V2DI 2 "register_operand" "w"))
6268	 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6269  "TARGET_SIMD && TARGET_SHA3"
6270  "xar\\t%0.2d, %1.2d, %2.2d, %3"
6271  [(set_attr "type" "crypto_sha3")]
6272)
6273
6274(define_insn "bcaxq<mode>4"
6275  [(set (match_operand:VQ_I 0 "register_operand" "=w")
6276	(xor:VQ_I
6277	 (and:VQ_I
6278	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6279	  (match_operand:VQ_I 2 "register_operand" "w"))
6280	 (match_operand:VQ_I 1 "register_operand" "w")))]
6281  "TARGET_SIMD && TARGET_SHA3"
6282  "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6283  [(set_attr "type" "crypto_sha3")]
6284)
6285
6286;; SM3
6287
6288(define_insn "aarch64_sm3ss1qv4si"
6289  [(set (match_operand:V4SI 0 "register_operand" "=w")
6290	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6291		      (match_operand:V4SI 2 "register_operand" "w")
6292		      (match_operand:V4SI 3 "register_operand" "w")]
6293	 UNSPEC_SM3SS1))]
6294  "TARGET_SIMD && TARGET_SM4"
6295  "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6296  [(set_attr "type" "crypto_sm3")]
6297)
6298
6299
6300(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6301  [(set (match_operand:V4SI 0 "register_operand" "=w")
6302	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6303		      (match_operand:V4SI 2 "register_operand" "w")
6304		      (match_operand:V4SI 3 "register_operand" "w")
6305		      (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6306	 CRYPTO_SM3TT))]
6307  "TARGET_SIMD && TARGET_SM4"
6308  "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6309  [(set_attr "type" "crypto_sm3")]
6310)
6311
6312(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6313  [(set (match_operand:V4SI 0 "register_operand" "=w")
6314	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6315		      (match_operand:V4SI 2 "register_operand" "w")
6316		      (match_operand:V4SI 3 "register_operand" "w")]
6317	 CRYPTO_SM3PART))]
6318  "TARGET_SIMD && TARGET_SM4"
6319  "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6320  [(set_attr "type" "crypto_sm3")]
6321)
6322
6323;; SM4
6324
6325(define_insn "aarch64_sm4eqv4si"
6326  [(set (match_operand:V4SI 0 "register_operand" "=w")
6327	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6328		      (match_operand:V4SI 2 "register_operand" "w")]
6329	 UNSPEC_SM4E))]
6330  "TARGET_SIMD && TARGET_SM4"
6331  "sm4e\\t%0.4s, %2.4s"
6332  [(set_attr "type" "crypto_sm4")]
6333)
6334
6335(define_insn "aarch64_sm4ekeyqv4si"
6336  [(set (match_operand:V4SI 0 "register_operand" "=w")
6337	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6338		      (match_operand:V4SI 2 "register_operand" "w")]
6339	 UNSPEC_SM4EKEY))]
6340  "TARGET_SIMD && TARGET_SM4"
6341  "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6342  [(set_attr "type" "crypto_sm4")]
6343)
6344
6345;; fp16fml
6346
6347(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6348  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6349	(unspec:VDQSF
6350	 [(match_operand:VDQSF 1 "register_operand" "0")
6351	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6353	 VFMLA16_LOW))]
6354  "TARGET_F16FML"
6355{
6356  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6357					    <nunits> * 2, false);
6358  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6359					    <nunits> * 2, false);
6360
6361  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6362								operands[1],
6363								operands[2],
6364								operands[3],
6365								p1, p2));
6366  DONE;
6367
6368})
6369
6370(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6371  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6372	(unspec:VDQSF
6373	 [(match_operand:VDQSF 1 "register_operand" "0")
6374	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
6375	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6376	 VFMLA16_HIGH))]
6377  "TARGET_F16FML"
6378{
6379  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6380  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6381
6382  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6383								 operands[1],
6384								 operands[2],
6385								 operands[3],
6386								 p1, p2));
6387  DONE;
6388})
6389
6390(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6391  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6392	(fma:VDQSF
6393	 (float_extend:VDQSF
6394	  (vec_select:<VFMLA_SEL_W>
6395	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6396	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6397	 (float_extend:VDQSF
6398	  (vec_select:<VFMLA_SEL_W>
6399	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6400	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6401	 (match_operand:VDQSF 1 "register_operand" "0")))]
6402  "TARGET_F16FML"
6403  "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6404  [(set_attr "type" "neon_fp_mul_s")]
6405)
6406
6407(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6408  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6409	(fma:VDQSF
6410	 (float_extend:VDQSF
6411	  (neg:<VFMLA_SEL_W>
6412	   (vec_select:<VFMLA_SEL_W>
6413	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6414	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6415	 (float_extend:VDQSF
6416	  (vec_select:<VFMLA_SEL_W>
6417	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6418	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6419	 (match_operand:VDQSF 1 "register_operand" "0")))]
6420  "TARGET_F16FML"
6421  "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6422  [(set_attr "type" "neon_fp_mul_s")]
6423)
6424
6425(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6426  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6427	(fma:VDQSF
6428	 (float_extend:VDQSF
6429	  (vec_select:<VFMLA_SEL_W>
6430	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6431	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6432	 (float_extend:VDQSF
6433	  (vec_select:<VFMLA_SEL_W>
6434	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6435	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6436	 (match_operand:VDQSF 1 "register_operand" "0")))]
6437  "TARGET_F16FML"
6438  "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6439  [(set_attr "type" "neon_fp_mul_s")]
6440)
6441
6442(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6443  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6444	(fma:VDQSF
6445	 (float_extend:VDQSF
6446	  (neg:<VFMLA_SEL_W>
6447	   (vec_select:<VFMLA_SEL_W>
6448	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6449	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6450	 (float_extend:VDQSF
6451	  (vec_select:<VFMLA_SEL_W>
6452	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6453	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6454	 (match_operand:VDQSF 1 "register_operand" "0")))]
6455  "TARGET_F16FML"
6456  "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6457  [(set_attr "type" "neon_fp_mul_s")]
6458)
6459
6460(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6461  [(set (match_operand:V2SF 0 "register_operand" "")
6462	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6463			   (match_operand:V4HF 2 "register_operand" "")
6464			   (match_operand:V4HF 3 "register_operand" "")
6465			   (match_operand:SI 4 "aarch64_imm2" "")]
6466	 VFMLA16_LOW))]
6467  "TARGET_F16FML"
6468{
6469    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6470    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6471
6472    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6473							    operands[1],
6474							    operands[2],
6475							    operands[3],
6476							    p1, lane));
6477    DONE;
6478}
6479)
6480
6481(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6482  [(set (match_operand:V2SF 0 "register_operand" "")
6483	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6484			   (match_operand:V4HF 2 "register_operand" "")
6485			   (match_operand:V4HF 3 "register_operand" "")
6486			   (match_operand:SI 4 "aarch64_imm2" "")]
6487	 VFMLA16_HIGH))]
6488  "TARGET_F16FML"
6489{
6490    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6491    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6492
6493    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6494							     operands[1],
6495							     operands[2],
6496							     operands[3],
6497							     p1, lane));
6498    DONE;
6499})
6500
6501(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6502  [(set (match_operand:V2SF 0 "register_operand" "=w")
6503	(fma:V2SF
6504	 (float_extend:V2SF
6505	   (vec_select:V2HF
6506	    (match_operand:V4HF 2 "register_operand" "w")
6507	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6508	 (float_extend:V2SF
6509	   (vec_duplicate:V2HF
6510	    (vec_select:HF
6511	     (match_operand:V4HF 3 "register_operand" "x")
6512	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6513	 (match_operand:V2SF 1 "register_operand" "0")))]
6514  "TARGET_F16FML"
6515  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6516  [(set_attr "type" "neon_fp_mul_s")]
6517)
6518
6519(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6520  [(set (match_operand:V2SF 0 "register_operand" "=w")
6521	(fma:V2SF
6522	 (float_extend:V2SF
6523	  (neg:V2HF
6524	   (vec_select:V2HF
6525	    (match_operand:V4HF 2 "register_operand" "w")
6526	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6527	 (float_extend:V2SF
6528	  (vec_duplicate:V2HF
6529	   (vec_select:HF
6530	    (match_operand:V4HF 3 "register_operand" "x")
6531	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6532	 (match_operand:V2SF 1 "register_operand" "0")))]
6533  "TARGET_F16FML"
6534  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6535  [(set_attr "type" "neon_fp_mul_s")]
6536)
6537
6538(define_insn "aarch64_simd_fmlal_lane_highv2sf"
6539  [(set (match_operand:V2SF 0 "register_operand" "=w")
6540	(fma:V2SF
6541	 (float_extend:V2SF
6542	   (vec_select:V2HF
6543	    (match_operand:V4HF 2 "register_operand" "w")
6544	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6545	 (float_extend:V2SF
6546	   (vec_duplicate:V2HF
6547	    (vec_select:HF
6548	     (match_operand:V4HF 3 "register_operand" "x")
6549	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6550	 (match_operand:V2SF 1 "register_operand" "0")))]
6551  "TARGET_F16FML"
6552  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6553  [(set_attr "type" "neon_fp_mul_s")]
6554)
6555
6556(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6557  [(set (match_operand:V2SF 0 "register_operand" "=w")
6558	(fma:V2SF
6559	 (float_extend:V2SF
6560	   (neg:V2HF
6561	    (vec_select:V2HF
6562	     (match_operand:V4HF 2 "register_operand" "w")
6563	     (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6564	 (float_extend:V2SF
6565	   (vec_duplicate:V2HF
6566	    (vec_select:HF
6567	     (match_operand:V4HF 3 "register_operand" "x")
6568	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6569	 (match_operand:V2SF 1 "register_operand" "0")))]
6570  "TARGET_F16FML"
6571  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6572  [(set_attr "type" "neon_fp_mul_s")]
6573)
6574
6575(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6576  [(set (match_operand:V4SF 0 "register_operand" "")
6577	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6578			   (match_operand:V8HF 2 "register_operand" "")
6579			   (match_operand:V8HF 3 "register_operand" "")
6580			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
6581	 VFMLA16_LOW))]
6582  "TARGET_F16FML"
6583{
6584    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6585    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6586
6587    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6588							      operands[1],
6589							      operands[2],
6590							      operands[3],
6591							      p1, lane));
6592    DONE;
6593})
6594
6595(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6596  [(set (match_operand:V4SF 0 "register_operand" "")
6597	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6598			   (match_operand:V8HF 2 "register_operand" "")
6599			   (match_operand:V8HF 3 "register_operand" "")
6600			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
6601	 VFMLA16_HIGH))]
6602  "TARGET_F16FML"
6603{
6604    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6605    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6606
6607    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6608							       operands[1],
6609							       operands[2],
6610							       operands[3],
6611							       p1, lane));
6612    DONE;
6613})
6614
6615(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6616  [(set (match_operand:V4SF 0 "register_operand" "=w")
6617	(fma:V4SF
6618	 (float_extend:V4SF
6619	  (vec_select:V4HF
6620	    (match_operand:V8HF 2 "register_operand" "w")
6621	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6622	 (float_extend:V4SF
6623	  (vec_duplicate:V4HF
6624	   (vec_select:HF
6625	    (match_operand:V8HF 3 "register_operand" "x")
6626	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627	 (match_operand:V4SF 1 "register_operand" "0")))]
6628  "TARGET_F16FML"
6629  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6630  [(set_attr "type" "neon_fp_mul_s")]
6631)
6632
6633(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6634  [(set (match_operand:V4SF 0 "register_operand" "=w")
6635	(fma:V4SF
6636	  (float_extend:V4SF
6637	   (neg:V4HF
6638	    (vec_select:V4HF
6639	     (match_operand:V8HF 2 "register_operand" "w")
6640	     (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6641	 (float_extend:V4SF
6642	  (vec_duplicate:V4HF
6643	   (vec_select:HF
6644	    (match_operand:V8HF 3 "register_operand" "x")
6645	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646	 (match_operand:V4SF 1 "register_operand" "0")))]
6647  "TARGET_F16FML"
6648  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6649  [(set_attr "type" "neon_fp_mul_s")]
6650)
6651
6652(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6653  [(set (match_operand:V4SF 0 "register_operand" "=w")
6654	(fma:V4SF
6655	 (float_extend:V4SF
6656	  (vec_select:V4HF
6657	    (match_operand:V8HF 2 "register_operand" "w")
6658	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6659	 (float_extend:V4SF
6660	  (vec_duplicate:V4HF
6661	   (vec_select:HF
6662	    (match_operand:V8HF 3 "register_operand" "x")
6663	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664	 (match_operand:V4SF 1 "register_operand" "0")))]
6665  "TARGET_F16FML"
6666  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6667  [(set_attr "type" "neon_fp_mul_s")]
6668)
6669
6670(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6671  [(set (match_operand:V4SF 0 "register_operand" "=w")
6672	(fma:V4SF
6673	 (float_extend:V4SF
6674	  (neg:V4HF
6675	   (vec_select:V4HF
6676	    (match_operand:V8HF 2 "register_operand" "w")
6677	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6678	 (float_extend:V4SF
6679	  (vec_duplicate:V4HF
6680	   (vec_select:HF
6681	    (match_operand:V8HF 3 "register_operand" "x")
6682	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683	 (match_operand:V4SF 1 "register_operand" "0")))]
6684  "TARGET_F16FML"
6685  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6686  [(set_attr "type" "neon_fp_mul_s")]
6687)
6688
6689(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6690  [(set (match_operand:V2SF 0 "register_operand" "")
6691	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6692		      (match_operand:V4HF 2 "register_operand" "")
6693		      (match_operand:V8HF 3 "register_operand" "")
6694		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
6695	 VFMLA16_LOW))]
6696  "TARGET_F16FML"
6697{
6698    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6699    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6700
6701    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6702							     operands[1],
6703							     operands[2],
6704							     operands[3],
6705							     p1, lane));
6706    DONE;
6707
6708})
6709
6710(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6711  [(set (match_operand:V2SF 0 "register_operand" "")
6712	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6713		      (match_operand:V4HF 2 "register_operand" "")
6714		      (match_operand:V8HF 3 "register_operand" "")
6715		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
6716	 VFMLA16_HIGH))]
6717  "TARGET_F16FML"
6718{
6719    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6720    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6721
6722    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6723							      operands[1],
6724							      operands[2],
6725							      operands[3],
6726							      p1, lane));
6727    DONE;
6728
6729})
6730
6731(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6732  [(set (match_operand:V2SF 0 "register_operand" "=w")
6733	(fma:V2SF
6734	 (float_extend:V2SF
6735	   (vec_select:V2HF
6736	    (match_operand:V4HF 2 "register_operand" "w")
6737	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6738	 (float_extend:V2SF
6739	  (vec_duplicate:V2HF
6740	   (vec_select:HF
6741	    (match_operand:V8HF 3 "register_operand" "x")
6742	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6743	 (match_operand:V2SF 1 "register_operand" "0")))]
6744  "TARGET_F16FML"
6745  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6746  [(set_attr "type" "neon_fp_mul_s")]
6747)
6748
6749(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6750  [(set (match_operand:V2SF 0 "register_operand" "=w")
6751	(fma:V2SF
6752	 (float_extend:V2SF
6753	  (neg:V2HF
6754	   (vec_select:V2HF
6755	    (match_operand:V4HF 2 "register_operand" "w")
6756	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6757	 (float_extend:V2SF
6758	  (vec_duplicate:V2HF
6759	   (vec_select:HF
6760	    (match_operand:V8HF 3 "register_operand" "x")
6761	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6762	 (match_operand:V2SF 1 "register_operand" "0")))]
6763  "TARGET_F16FML"
6764  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6765  [(set_attr "type" "neon_fp_mul_s")]
6766)
6767
6768(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6769  [(set (match_operand:V2SF 0 "register_operand" "=w")
6770	(fma:V2SF
6771	 (float_extend:V2SF
6772	   (vec_select:V2HF
6773	    (match_operand:V4HF 2 "register_operand" "w")
6774	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6775	 (float_extend:V2SF
6776	  (vec_duplicate:V2HF
6777	   (vec_select:HF
6778	    (match_operand:V8HF 3 "register_operand" "x")
6779	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6780	 (match_operand:V2SF 1 "register_operand" "0")))]
6781  "TARGET_F16FML"
6782  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6783  [(set_attr "type" "neon_fp_mul_s")]
6784)
6785
6786(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6787  [(set (match_operand:V2SF 0 "register_operand" "=w")
6788	(fma:V2SF
6789	 (float_extend:V2SF
6790	  (neg:V2HF
6791	   (vec_select:V2HF
6792	    (match_operand:V4HF 2 "register_operand" "w")
6793	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6794	 (float_extend:V2SF
6795	  (vec_duplicate:V2HF
6796	   (vec_select:HF
6797	    (match_operand:V8HF 3 "register_operand" "x")
6798	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6799	 (match_operand:V2SF 1 "register_operand" "0")))]
6800  "TARGET_F16FML"
6801  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6802  [(set_attr "type" "neon_fp_mul_s")]
6803)
6804
6805(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6806  [(set (match_operand:V4SF 0 "register_operand" "")
6807	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6808		      (match_operand:V8HF 2 "register_operand" "")
6809		      (match_operand:V4HF 3 "register_operand" "")
6810		      (match_operand:SI 4 "aarch64_imm2" "")]
6811	 VFMLA16_LOW))]
6812  "TARGET_F16FML"
6813{
6814    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6815    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6816
6817    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6818							     operands[1],
6819							     operands[2],
6820							     operands[3],
6821							     p1, lane));
6822    DONE;
6823})
6824
6825(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6826  [(set (match_operand:V4SF 0 "register_operand" "")
6827	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6828		      (match_operand:V8HF 2 "register_operand" "")
6829		      (match_operand:V4HF 3 "register_operand" "")
6830		      (match_operand:SI 4 "aarch64_imm2" "")]
6831	 VFMLA16_HIGH))]
6832  "TARGET_F16FML"
6833{
6834    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6835    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6836
6837    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6838							      operands[1],
6839							      operands[2],
6840							      operands[3],
6841							      p1, lane));
6842    DONE;
6843})
6844
6845(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6846  [(set (match_operand:V4SF 0 "register_operand" "=w")
6847	(fma:V4SF
6848	 (float_extend:V4SF
6849	  (vec_select:V4HF
6850	   (match_operand:V8HF 2 "register_operand" "w")
6851	   (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6852	 (float_extend:V4SF
6853	  (vec_duplicate:V4HF
6854	   (vec_select:HF
6855	    (match_operand:V4HF 3 "register_operand" "x")
6856	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6857	 (match_operand:V4SF 1 "register_operand" "0")))]
6858  "TARGET_F16FML"
6859  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6860  [(set_attr "type" "neon_fp_mul_s")]
6861)
6862
6863(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6864  [(set (match_operand:V4SF 0 "register_operand" "=w")
6865	(fma:V4SF
6866	 (float_extend:V4SF
6867	  (neg:V4HF
6868	   (vec_select:V4HF
6869	    (match_operand:V8HF 2 "register_operand" "w")
6870	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6871	 (float_extend:V4SF
6872	  (vec_duplicate:V4HF
6873	   (vec_select:HF
6874	    (match_operand:V4HF 3 "register_operand" "x")
6875	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6876	 (match_operand:V4SF 1 "register_operand" "0")))]
6877  "TARGET_F16FML"
6878  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6879  [(set_attr "type" "neon_fp_mul_s")]
6880)
6881
6882(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6883  [(set (match_operand:V4SF 0 "register_operand" "=w")
6884	(fma:V4SF
6885	 (float_extend:V4SF
6886	  (vec_select:V4HF
6887	   (match_operand:V8HF 2 "register_operand" "w")
6888	   (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6889	 (float_extend:V4SF
6890	  (vec_duplicate:V4HF
6891	   (vec_select:HF
6892	    (match_operand:V4HF 3 "register_operand" "x")
6893	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6894	 (match_operand:V4SF 1 "register_operand" "0")))]
6895  "TARGET_F16FML"
6896  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6897  [(set_attr "type" "neon_fp_mul_s")]
6898)
6899
6900(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6901  [(set (match_operand:V4SF 0 "register_operand" "=w")
6902	(fma:V4SF
6903	 (float_extend:V4SF
6904	  (neg:V4HF
6905	   (vec_select:V4HF
6906	    (match_operand:V8HF 2 "register_operand" "w")
6907	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6908	 (float_extend:V4SF
6909	  (vec_duplicate:V4HF
6910	   (vec_select:HF
6911	    (match_operand:V4HF 3 "register_operand" "x")
6912	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6913	 (match_operand:V4SF 1 "register_operand" "0")))]
6914  "TARGET_F16FML"
6915  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6916  [(set_attr "type" "neon_fp_mul_s")]
6917)
6918
6919;; pmull
6920
6921(define_insn "aarch64_crypto_pmulldi"
6922  [(set (match_operand:TI 0 "register_operand" "=w")
6923        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6924		     (match_operand:DI 2 "register_operand" "w")]
6925		    UNSPEC_PMULL))]
6926 "TARGET_SIMD && TARGET_AES"
6927 "pmull\\t%0.1q, %1.1d, %2.1d"
6928  [(set_attr "type" "crypto_pmull")]
6929)
6930
6931(define_insn "aarch64_crypto_pmullv2di"
6932 [(set (match_operand:TI 0 "register_operand" "=w")
6933       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6934		   (match_operand:V2DI 2 "register_operand" "w")]
6935		  UNSPEC_PMULL2))]
6936  "TARGET_SIMD && TARGET_AES"
6937  "pmull2\\t%0.1q, %1.2d, %2.2d"
6938  [(set_attr "type" "crypto_pmull")]
6939)
6940