xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/aarch64/aarch64-simd.md (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1;; Machine description for AArch64 AdvSIMD architecture.
2;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_expand "mov<mode>"
22  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23	(match_operand:VALL_F16 1 "general_operand" ""))]
24  "TARGET_SIMD"
25  "
26    if (GET_CODE (operands[0]) == MEM)
27      operands[1] = force_reg (<MODE>mode, operands[1]);
28  "
29)
30
31(define_expand "movmisalign<mode>"
32  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33        (match_operand:VALL 1 "general_operand" ""))]
34  "TARGET_SIMD"
35{
36  /* This pattern is not permitted to fail during expansion: if both arguments
37     are non-registers (e.g. memory := constant, which can be created by the
38     auto-vectorizer), force operand 1 into a register.  */
39  if (!register_operand (operands[0], <MODE>mode)
40      && !register_operand (operands[1], <MODE>mode))
41    operands[1] = force_reg (<MODE>mode, operands[1]);
42})
43
44(define_insn "aarch64_simd_dup<mode>"
45  [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
46	(vec_duplicate:VDQ_I
47	  (match_operand:<VEL> 1 "register_operand" "r, w")))]
48  "TARGET_SIMD"
49  "@
50   dup\\t%0.<Vtype>, %<vw>1
51   dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52  [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
53)
54
55(define_insn "aarch64_simd_dup<mode>"
56  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57	(vec_duplicate:VDQF_F16
58	  (match_operand:<VEL> 1 "register_operand" "w")))]
59  "TARGET_SIMD"
60  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61  [(set_attr "type" "neon_dup<q>")]
62)
63
64(define_insn "aarch64_dup_lane<mode>"
65  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66	(vec_duplicate:VALL_F16
67	  (vec_select:<VEL>
68	    (match_operand:VALL_F16 1 "register_operand" "w")
69	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70          )))]
71  "TARGET_SIMD"
72  {
73    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75  }
76  [(set_attr "type" "neon_dup<q>")]
77)
78
79(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
81	(vec_duplicate:VALL_F16_NO_V2Q
82	  (vec_select:<VEL>
83	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
85          )))]
86  "TARGET_SIMD"
87  {
88    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89					  INTVAL (operands[2])));
90    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91  }
92  [(set_attr "type" "neon_dup<q>")]
93)
94
95(define_insn "*aarch64_simd_mov<mode>"
96  [(set (match_operand:VD 0 "nonimmediate_operand"
97		"=w, m,  w, ?r, ?w, ?r, w")
98	(match_operand:VD 1 "general_operand"
99		"m,  w,  w,  w,  r,  r, Dn"))]
100  "TARGET_SIMD
101   && (register_operand (operands[0], <MODE>mode)
102       || register_operand (operands[1], <MODE>mode))"
103{
104   switch (which_alternative)
105     {
106     case 0: return "ldr\\t%d0, %1";
107     case 1: return "str\\t%d1, %0";
108     case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109     case 3: return "umov\t%0, %1.d[0]";
110     case 4: return "fmov\t%d0, %1";
111     case 5: return "mov\t%0, %1";
112     case 6:
113	return aarch64_output_simd_mov_immediate (operands[1],
114						  <MODE>mode, 64);
115     default: gcc_unreachable ();
116     }
117}
118  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
120		     mov_reg, neon_move<q>")]
121)
122
123(define_insn "*aarch64_simd_mov<mode>"
124  [(set (match_operand:VQ 0 "nonimmediate_operand"
125		"=w, m,  w, ?r, ?w, ?r, w")
126	(match_operand:VQ 1 "general_operand"
127		"m,  w,  w,  w,  r,  r, Dn"))]
128  "TARGET_SIMD
129   && (register_operand (operands[0], <MODE>mode)
130       || register_operand (operands[1], <MODE>mode))"
131{
132  switch (which_alternative)
133    {
134    case 0:
135	return "ldr\\t%q0, %1";
136    case 1:
137	return "str\\t%q1, %0";
138    case 2:
139	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
140    case 3:
141    case 4:
142    case 5:
143	return "#";
144    case 6:
145	return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
146    default:
147	gcc_unreachable ();
148    }
149}
150  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151                     neon_logic<q>, multiple, multiple, multiple,\
152                     neon_move<q>")
153   (set_attr "length" "4,4,4,8,8,8,4")]
154)
155
156(define_insn "load_pair<mode>"
157  [(set (match_operand:VD 0 "register_operand" "=w")
158	(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159   (set (match_operand:VD 2 "register_operand" "=w")
160	(match_operand:VD 3 "memory_operand" "m"))]
161  "TARGET_SIMD
162   && rtx_equal_p (XEXP (operands[3], 0),
163		   plus_constant (Pmode,
164				  XEXP (operands[1], 0),
165				  GET_MODE_SIZE (<MODE>mode)))"
166  "ldp\\t%d0, %d2, %1"
167  [(set_attr "type" "neon_ldp")]
168)
169
170(define_insn "store_pair<mode>"
171  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172	(match_operand:VD 1 "register_operand" "w"))
173   (set (match_operand:VD 2 "memory_operand" "=m")
174	(match_operand:VD 3 "register_operand" "w"))]
175  "TARGET_SIMD
176   && rtx_equal_p (XEXP (operands[2], 0),
177		   plus_constant (Pmode,
178				  XEXP (operands[0], 0),
179				  GET_MODE_SIZE (<MODE>mode)))"
180  "stp\\t%d1, %d3, %0"
181  [(set_attr "type" "neon_stp")]
182)
183
184(define_split
185  [(set (match_operand:VQ 0 "register_operand" "")
186      (match_operand:VQ 1 "register_operand" ""))]
187  "TARGET_SIMD && reload_completed
188   && GP_REGNUM_P (REGNO (operands[0]))
189   && GP_REGNUM_P (REGNO (operands[1]))"
190  [(const_int 0)]
191{
192  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
193  DONE;
194})
195
196(define_split
197  [(set (match_operand:VQ 0 "register_operand" "")
198        (match_operand:VQ 1 "register_operand" ""))]
199  "TARGET_SIMD && reload_completed
200   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
202  [(const_int 0)]
203{
204  aarch64_split_simd_move (operands[0], operands[1]);
205  DONE;
206})
207
208(define_expand "aarch64_split_simd_mov<mode>"
209  [(set (match_operand:VQ 0)
210        (match_operand:VQ 1))]
211  "TARGET_SIMD"
212  {
213    rtx dst = operands[0];
214    rtx src = operands[1];
215
216    if (GP_REGNUM_P (REGNO (src)))
217      {
218        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219        rtx src_high_part = gen_highpart (<VHALF>mode, src);
220
221        emit_insn
222          (gen_move_lo_quad_<mode> (dst, src_low_part));
223        emit_insn
224          (gen_move_hi_quad_<mode> (dst, src_high_part));
225      }
226
227    else
228      {
229        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231        rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232        rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
233
234        emit_insn
235          (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
236        emit_insn
237          (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
238      }
239    DONE;
240  }
241)
242
243(define_insn "aarch64_simd_mov_from_<mode>low"
244  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
245        (vec_select:<VHALF>
246          (match_operand:VQ 1 "register_operand" "w")
247          (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248  "TARGET_SIMD && reload_completed"
249  "umov\t%0, %1.d[0]"
250  [(set_attr "type" "neon_to_gp<q>")
251   (set_attr "length" "4")
252  ])
253
254(define_insn "aarch64_simd_mov_from_<mode>high"
255  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
256        (vec_select:<VHALF>
257          (match_operand:VQ 1 "register_operand" "w")
258          (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259  "TARGET_SIMD && reload_completed"
260  "umov\t%0, %1.d[1]"
261  [(set_attr "type" "neon_to_gp<q>")
262   (set_attr "length" "4")
263  ])
264
265(define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267       (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268		(match_operand:VDQ_I 2 "register_operand" "w")))]
269 "TARGET_SIMD"
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271  [(set_attr "type" "neon_logic<q>")]
272)
273
274(define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276       (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277		(match_operand:VDQ_I 2 "register_operand" "w")))]
278 "TARGET_SIMD"
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280  [(set_attr "type" "neon_logic<q>")]
281)
282
283(define_insn "add<mode>3"
284  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285        (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286		  (match_operand:VDQ_I 2 "register_operand" "w")))]
287  "TARGET_SIMD"
288  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289  [(set_attr "type" "neon_add<q>")]
290)
291
292(define_insn "sub<mode>3"
293  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294        (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295		   (match_operand:VDQ_I 2 "register_operand" "w")))]
296  "TARGET_SIMD"
297  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298  [(set_attr "type" "neon_sub<q>")]
299)
300
301(define_insn "mul<mode>3"
302  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303        (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304		   (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
305  "TARGET_SIMD"
306  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307  [(set_attr "type" "neon_mul_<Vetype><q>")]
308)
309
310(define_insn "bswap<mode>2"
311  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
313  "TARGET_SIMD"
314  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315  [(set_attr "type" "neon_rev<q>")]
316)
317
318(define_insn "aarch64_rbit<mode>"
319  [(set (match_operand:VB 0 "register_operand" "=w")
320	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
321		   UNSPEC_RBIT))]
322  "TARGET_SIMD"
323  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324  [(set_attr "type" "neon_rbit")]
325)
326
327(define_expand "ctz<mode>2"
328  [(set (match_operand:VS 0 "register_operand")
329        (ctz:VS (match_operand:VS 1 "register_operand")))]
330  "TARGET_SIMD"
331  {
332     emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
334					     <MODE>mode, 0);
335     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
337     DONE;
338  }
339)
340
341(define_expand "copysign<mode>3"
342  [(match_operand:VHSDF 0 "register_operand")
343   (match_operand:VHSDF 1 "register_operand")
344   (match_operand:VHSDF 2 "register_operand")]
345  "TARGET_FLOAT && TARGET_SIMD"
346{
347  rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
348  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
349
350  emit_move_insn (v_bitmask,
351		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
352						     HOST_WIDE_INT_M1U << bits));
353  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
354					 operands[2], operands[1]));
355  DONE;
356}
357)
358
359(define_insn "*aarch64_mul3_elt<mode>"
360 [(set (match_operand:VMUL 0 "register_operand" "=w")
361    (mult:VMUL
362      (vec_duplicate:VMUL
363	  (vec_select:<VEL>
364	    (match_operand:VMUL 1 "register_operand" "<h_con>")
365	    (parallel [(match_operand:SI 2 "immediate_operand")])))
366      (match_operand:VMUL 3 "register_operand" "w")))]
367  "TARGET_SIMD"
368  {
369    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
370    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371  }
372  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
373)
374
375(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
376  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
377     (mult:VMUL_CHANGE_NLANES
378       (vec_duplicate:VMUL_CHANGE_NLANES
379	  (vec_select:<VEL>
380	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
381	    (parallel [(match_operand:SI 2 "immediate_operand")])))
382      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
383  "TARGET_SIMD"
384  {
385    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
386					  INTVAL (operands[2])));
387    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
388  }
389  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
390)
391
392(define_insn "*aarch64_mul3_elt_from_dup<mode>"
393 [(set (match_operand:VMUL 0 "register_operand" "=w")
394    (mult:VMUL
395      (vec_duplicate:VMUL
396	    (match_operand:<VEL> 1 "register_operand" "<h_con>"))
397      (match_operand:VMUL 2 "register_operand" "w")))]
398  "TARGET_SIMD"
399  "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
400  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
401)
402
403(define_insn "aarch64_rsqrte<mode>"
404  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
405	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
406		     UNSPEC_RSQRTE))]
407  "TARGET_SIMD"
408  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
409  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
410
411(define_insn "aarch64_rsqrts<mode>"
412  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
413	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
414			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
415	 UNSPEC_RSQRTS))]
416  "TARGET_SIMD"
417  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
418  [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
419
420(define_expand "rsqrt<mode>2"
421  [(set (match_operand:VALLF 0 "register_operand" "=w")
422	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
423		     UNSPEC_RSQRT))]
424  "TARGET_SIMD"
425{
426  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
427  DONE;
428})
429
430(define_insn "*aarch64_mul3_elt_to_64v2df"
431  [(set (match_operand:DF 0 "register_operand" "=w")
432     (mult:DF
433       (vec_select:DF
434	 (match_operand:V2DF 1 "register_operand" "w")
435	 (parallel [(match_operand:SI 2 "immediate_operand")]))
436       (match_operand:DF 3 "register_operand" "w")))]
437  "TARGET_SIMD"
438  {
439    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
440    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
441  }
442  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
443)
444
445(define_insn "neg<mode>2"
446  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
447	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
448  "TARGET_SIMD"
449  "neg\t%0.<Vtype>, %1.<Vtype>"
450  [(set_attr "type" "neon_neg<q>")]
451)
452
453(define_insn "abs<mode>2"
454  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
455        (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
456  "TARGET_SIMD"
457  "abs\t%0.<Vtype>, %1.<Vtype>"
458  [(set_attr "type" "neon_abs<q>")]
459)
460
461;; The intrinsic version of integer ABS must not be allowed to
462;; combine with any operation with an integerated ABS step, such
463;; as SABD.
464(define_insn "aarch64_abs<mode>"
465  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
466	  (unspec:VSDQ_I_DI
467	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
468	   UNSPEC_ABS))]
469  "TARGET_SIMD"
470  "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
471  [(set_attr "type" "neon_abs<q>")]
472)
473
474(define_insn "abd<mode>_3"
475  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
476	(abs:VDQ_BHSI (minus:VDQ_BHSI
477		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
478		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
479  "TARGET_SIMD"
480  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
481  [(set_attr "type" "neon_abd<q>")]
482)
483
484(define_insn "aba<mode>_3"
485  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
486	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
487			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
488			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
489		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
490  "TARGET_SIMD"
491  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
492  [(set_attr "type" "neon_arith_acc<q>")]
493)
494
495(define_insn "fabd<mode>3"
496  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
497	(abs:VHSDF_HSDF
498	  (minus:VHSDF_HSDF
499	    (match_operand:VHSDF_HSDF 1 "register_operand" "w")
500	    (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
501  "TARGET_SIMD"
502  "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
503  [(set_attr "type" "neon_fp_abd_<stype><q>")]
504)
505
506(define_insn "and<mode>3"
507  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509		 (match_operand:VDQ_I 2 "register_operand" "w")))]
510  "TARGET_SIMD"
511  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512  [(set_attr "type" "neon_logic<q>")]
513)
514
515(define_insn "ior<mode>3"
516  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
518		 (match_operand:VDQ_I 2 "register_operand" "w")))]
519  "TARGET_SIMD"
520  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
521  [(set_attr "type" "neon_logic<q>")]
522)
523
524(define_insn "xor<mode>3"
525  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
526        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
527		 (match_operand:VDQ_I 2 "register_operand" "w")))]
528  "TARGET_SIMD"
529  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
530  [(set_attr "type" "neon_logic<q>")]
531)
532
533(define_insn "one_cmpl<mode>2"
534  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
535        (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
536  "TARGET_SIMD"
537  "not\t%0.<Vbtype>, %1.<Vbtype>"
538  [(set_attr "type" "neon_logic<q>")]
539)
540
541(define_insn "aarch64_simd_vec_set<mode>"
542  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
543        (vec_merge:VDQ_BHSI
544	    (vec_duplicate:VDQ_BHSI
545		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
546	    (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
547	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
548  "TARGET_SIMD"
549  {
550   int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
551   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
552   switch (which_alternative)
553     {
554     case 0:
555	return "ins\\t%0.<Vetype>[%p2], %w1";
556     case 1:
557	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
558     case 2:
559        return "ld1\\t{%0.<Vetype>}[%p2], %1";
560     default:
561	gcc_unreachable ();
562     }
563  }
564  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
565)
566
567(define_insn "*aarch64_simd_vec_copy_lane<mode>"
568  [(set (match_operand:VALL 0 "register_operand" "=w")
569	(vec_merge:VALL
570	    (vec_duplicate:VALL
571	      (vec_select:<VEL>
572		(match_operand:VALL 3 "register_operand" "w")
573		(parallel
574		  [(match_operand:SI 4 "immediate_operand" "i")])))
575	    (match_operand:VALL 1 "register_operand" "0")
576	    (match_operand:SI 2 "immediate_operand" "i")))]
577  "TARGET_SIMD"
578  {
579    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
580    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
581    operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
582
583    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
584  }
585  [(set_attr "type" "neon_ins<q>")]
586)
587
588(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
589  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
590	(vec_merge:VALL_F16_NO_V2Q
591	    (vec_duplicate:VALL_F16_NO_V2Q
592	      (vec_select:<VEL>
593		(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
594		(parallel
595		  [(match_operand:SI 4 "immediate_operand" "i")])))
596	    (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
597	    (match_operand:SI 2 "immediate_operand" "i")))]
598  "TARGET_SIMD"
599  {
600    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
601    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
602    operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
603			   INTVAL (operands[4])));
604
605    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
606  }
607  [(set_attr "type" "neon_ins<q>")]
608)
609
610(define_insn "aarch64_simd_lshr<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612       (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
614 "TARGET_SIMD"
615 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
616  [(set_attr "type" "neon_shift_imm<q>")]
617)
618
619(define_insn "aarch64_simd_ashr<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
623 "TARGET_SIMD"
624 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
625  [(set_attr "type" "neon_shift_imm<q>")]
626)
627
628(define_insn "aarch64_simd_imm_shl<mode>"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
631		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
632 "TARGET_SIMD"
633  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
634  [(set_attr "type" "neon_shift_imm<q>")]
635)
636
637(define_insn "aarch64_simd_reg_sshl<mode>"
638 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
639       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
640		   (match_operand:VDQ_I 2 "register_operand" "w")))]
641 "TARGET_SIMD"
642 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
643  [(set_attr "type" "neon_shift_reg<q>")]
644)
645
646(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
647 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
648       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
649		    (match_operand:VDQ_I 2 "register_operand" "w")]
650		   UNSPEC_ASHIFT_UNSIGNED))]
651 "TARGET_SIMD"
652 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
653  [(set_attr "type" "neon_shift_reg<q>")]
654)
655
656(define_insn "aarch64_simd_reg_shl<mode>_signed"
657 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
658       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
659		    (match_operand:VDQ_I 2 "register_operand" "w")]
660		   UNSPEC_ASHIFT_SIGNED))]
661 "TARGET_SIMD"
662 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
663  [(set_attr "type" "neon_shift_reg<q>")]
664)
665
666(define_expand "ashl<mode>3"
667  [(match_operand:VDQ_I 0 "register_operand" "")
668   (match_operand:VDQ_I 1 "register_operand" "")
669   (match_operand:SI  2 "general_operand" "")]
670 "TARGET_SIMD"
671{
672  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
673  int shift_amount;
674
675  if (CONST_INT_P (operands[2]))
676    {
677      shift_amount = INTVAL (operands[2]);
678      if (shift_amount >= 0 && shift_amount < bit_width)
679        {
680	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
681						       shift_amount);
682	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
683						     operands[1],
684						     tmp));
685          DONE;
686        }
687      else
688        {
689          operands[2] = force_reg (SImode, operands[2]);
690        }
691    }
692  else if (MEM_P (operands[2]))
693    {
694      operands[2] = force_reg (SImode, operands[2]);
695    }
696
697  if (REG_P (operands[2]))
698    {
699      rtx tmp = gen_reg_rtx (<MODE>mode);
700      emit_insn (gen_aarch64_simd_dup<mode> (tmp,
701					     convert_to_mode (<VEL>mode,
702							      operands[2],
703							      0)));
704      emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
705						  tmp));
706      DONE;
707    }
708  else
709    FAIL;
710}
711)
712
713(define_expand "lshr<mode>3"
714  [(match_operand:VDQ_I 0 "register_operand" "")
715   (match_operand:VDQ_I 1 "register_operand" "")
716   (match_operand:SI  2 "general_operand" "")]
717 "TARGET_SIMD"
718{
719  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
720  int shift_amount;
721
722  if (CONST_INT_P (operands[2]))
723    {
724      shift_amount = INTVAL (operands[2]);
725      if (shift_amount > 0 && shift_amount <= bit_width)
726        {
727	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
728						       shift_amount);
729          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
730						  operands[1],
731						  tmp));
732	  DONE;
733	}
734      else
735        operands[2] = force_reg (SImode, operands[2]);
736    }
737  else if (MEM_P (operands[2]))
738    {
739      operands[2] = force_reg (SImode, operands[2]);
740    }
741
742  if (REG_P (operands[2]))
743    {
744      rtx tmp = gen_reg_rtx (SImode);
745      rtx tmp1 = gen_reg_rtx (<MODE>mode);
746      emit_insn (gen_negsi2 (tmp, operands[2]));
747      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
748					     convert_to_mode (<VEL>mode,
749							      tmp, 0)));
750      emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
751							  operands[1],
752							  tmp1));
753      DONE;
754    }
755  else
756    FAIL;
757}
758)
759
760(define_expand "ashr<mode>3"
761  [(match_operand:VDQ_I 0 "register_operand" "")
762   (match_operand:VDQ_I 1 "register_operand" "")
763   (match_operand:SI  2 "general_operand" "")]
764 "TARGET_SIMD"
765{
766  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
767  int shift_amount;
768
769  if (CONST_INT_P (operands[2]))
770    {
771      shift_amount = INTVAL (operands[2]);
772      if (shift_amount > 0 && shift_amount <= bit_width)
773        {
774	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
775						       shift_amount);
776          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
777						  operands[1],
778						  tmp));
779          DONE;
780	}
781      else
782        operands[2] = force_reg (SImode, operands[2]);
783    }
784  else if (MEM_P (operands[2]))
785    {
786      operands[2] = force_reg (SImode, operands[2]);
787    }
788
789  if (REG_P (operands[2]))
790    {
791      rtx tmp = gen_reg_rtx (SImode);
792      rtx tmp1 = gen_reg_rtx (<MODE>mode);
793      emit_insn (gen_negsi2 (tmp, operands[2]));
794      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
795					     convert_to_mode (<VEL>mode,
796							      tmp, 0)));
797      emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
798							operands[1],
799							tmp1));
800      DONE;
801    }
802  else
803    FAIL;
804}
805)
806
807(define_expand "vashl<mode>3"
808 [(match_operand:VDQ_I 0 "register_operand" "")
809  (match_operand:VDQ_I 1 "register_operand" "")
810  (match_operand:VDQ_I 2 "register_operand" "")]
811 "TARGET_SIMD"
812{
813  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
814					      operands[2]));
815  DONE;
816})
817
818;; Using mode VDQ_BHSI as there is no V2DImode neg!
819;; Negating individual lanes most certainly offsets the
820;; gain from vectorization.
821(define_expand "vashr<mode>3"
822 [(match_operand:VDQ_BHSI 0 "register_operand" "")
823  (match_operand:VDQ_BHSI 1 "register_operand" "")
824  (match_operand:VDQ_BHSI 2 "register_operand" "")]
825 "TARGET_SIMD"
826{
827  rtx neg = gen_reg_rtx (<MODE>mode);
828  emit (gen_neg<mode>2 (neg, operands[2]));
829  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
830						    neg));
831  DONE;
832})
833
834;; DI vector shift
835(define_expand "aarch64_ashr_simddi"
836  [(match_operand:DI 0 "register_operand" "=w")
837   (match_operand:DI 1 "register_operand" "w")
838   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
839  "TARGET_SIMD"
840  {
841    /* An arithmetic shift right by 64 fills the result with copies of the sign
842       bit, just like asr by 63 - however the standard pattern does not handle
843       a shift by 64.  */
844    if (INTVAL (operands[2]) == 64)
845      operands[2] = GEN_INT (63);
846    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
847    DONE;
848  }
849)
850
851(define_expand "vlshr<mode>3"
852 [(match_operand:VDQ_BHSI 0 "register_operand" "")
853  (match_operand:VDQ_BHSI 1 "register_operand" "")
854  (match_operand:VDQ_BHSI 2 "register_operand" "")]
855 "TARGET_SIMD"
856{
857  rtx neg = gen_reg_rtx (<MODE>mode);
858  emit (gen_neg<mode>2 (neg, operands[2]));
859  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
860						      neg));
861  DONE;
862})
863
864(define_expand "aarch64_lshr_simddi"
865  [(match_operand:DI 0 "register_operand" "=w")
866   (match_operand:DI 1 "register_operand" "w")
867   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
868  "TARGET_SIMD"
869  {
870    if (INTVAL (operands[2]) == 64)
871      emit_move_insn (operands[0], const0_rtx);
872    else
873      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
874    DONE;
875  }
876)
877
878(define_expand "vec_set<mode>"
879  [(match_operand:VDQ_BHSI 0 "register_operand")
880   (match_operand:<VEL> 1 "register_operand")
881   (match_operand:SI 2 "immediate_operand")]
882  "TARGET_SIMD"
883  {
884    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
885    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
886					    GEN_INT (elem), operands[0]));
887    DONE;
888  }
889)
890
891;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
892(define_insn "vec_shr_<mode>"
893  [(set (match_operand:VD 0 "register_operand" "=w")
894        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
895		    (match_operand:SI 2 "immediate_operand" "i")]
896		   UNSPEC_VEC_SHR))]
897  "TARGET_SIMD"
898  {
899    if (BYTES_BIG_ENDIAN)
900      return "shl %d0, %d1, %2";
901    else
902      return "ushr %d0, %d1, %2";
903  }
904  [(set_attr "type" "neon_shift_imm")]
905)
906
907(define_insn "aarch64_simd_vec_setv2di"
908  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
909        (vec_merge:V2DI
910	    (vec_duplicate:V2DI
911		(match_operand:DI 1 "register_operand" "r,w"))
912	    (match_operand:V2DI 3 "register_operand" "0,0")
913	    (match_operand:SI 2 "immediate_operand" "i,i")))]
914  "TARGET_SIMD"
915  {
916    int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
917    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
918    switch (which_alternative)
919      {
920      case 0:
921	return "ins\\t%0.d[%p2], %1";
922      case 1:
923        return "ins\\t%0.d[%p2], %1.d[0]";
924      default:
925	gcc_unreachable ();
926      }
927  }
928  [(set_attr "type" "neon_from_gp, neon_ins_q")]
929)
930
931(define_expand "vec_setv2di"
932  [(match_operand:V2DI 0 "register_operand")
933   (match_operand:DI 1 "register_operand")
934   (match_operand:SI 2 "immediate_operand")]
935  "TARGET_SIMD"
936  {
937    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
938    emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
939					  GEN_INT (elem), operands[0]));
940    DONE;
941  }
942)
943
944(define_insn "aarch64_simd_vec_set<mode>"
945  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
946	(vec_merge:VDQF_F16
947	    (vec_duplicate:VDQF_F16
948		(match_operand:<VEL> 1 "register_operand" "w"))
949	    (match_operand:VDQF_F16 3 "register_operand" "0")
950	    (match_operand:SI 2 "immediate_operand" "i")))]
951  "TARGET_SIMD"
952  {
953    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
954
955    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
956    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
957  }
958  [(set_attr "type" "neon_ins<q>")]
959)
960
961(define_expand "vec_set<mode>"
962  [(match_operand:VDQF_F16 0 "register_operand" "+w")
963   (match_operand:<VEL> 1 "register_operand" "w")
964   (match_operand:SI 2 "immediate_operand" "")]
965  "TARGET_SIMD"
966  {
967    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
968    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
969					  GEN_INT (elem), operands[0]));
970    DONE;
971  }
972)
973
974
975(define_insn "aarch64_mla<mode>"
976 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
977       (plus:VDQ_BHSI (mult:VDQ_BHSI
978			(match_operand:VDQ_BHSI 2 "register_operand" "w")
979			(match_operand:VDQ_BHSI 3 "register_operand" "w"))
980		      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
981 "TARGET_SIMD"
982 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
983  [(set_attr "type" "neon_mla_<Vetype><q>")]
984)
985
986(define_insn "*aarch64_mla_elt<mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
988       (plus:VDQHS
989	 (mult:VDQHS
990	   (vec_duplicate:VDQHS
991	      (vec_select:<VEL>
992		(match_operand:VDQHS 1 "register_operand" "<h_con>")
993		  (parallel [(match_operand:SI 2 "immediate_operand")])))
994	   (match_operand:VDQHS 3 "register_operand" "w"))
995	 (match_operand:VDQHS 4 "register_operand" "0")))]
996 "TARGET_SIMD"
997  {
998    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
999    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1000  }
1001  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1002)
1003
1004(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1005 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1006       (plus:VDQHS
1007	 (mult:VDQHS
1008	   (vec_duplicate:VDQHS
1009	      (vec_select:<VEL>
1010		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1011		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1012	   (match_operand:VDQHS 3 "register_operand" "w"))
1013	 (match_operand:VDQHS 4 "register_operand" "0")))]
1014 "TARGET_SIMD"
1015  {
1016    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1017					  INTVAL (operands[2])));
1018    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1019  }
1020  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1021)
1022
1023(define_insn "aarch64_mls<mode>"
1024 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025       (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1026		   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027			      (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1028 "TARGET_SIMD"
1029 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1030  [(set_attr "type" "neon_mla_<Vetype><q>")]
1031)
1032
1033(define_insn "*aarch64_mls_elt<mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1035       (minus:VDQHS
1036	 (match_operand:VDQHS 4 "register_operand" "0")
1037	 (mult:VDQHS
1038	   (vec_duplicate:VDQHS
1039	      (vec_select:<VEL>
1040		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1041		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1042	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1043 "TARGET_SIMD"
1044  {
1045    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1046    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1047  }
1048  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1049)
1050
1051(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1052 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1053       (minus:VDQHS
1054	 (match_operand:VDQHS 4 "register_operand" "0")
1055	 (mult:VDQHS
1056	   (vec_duplicate:VDQHS
1057	      (vec_select:<VEL>
1058		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1060	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1061 "TARGET_SIMD"
1062  {
1063    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1064					  INTVAL (operands[2])));
1065    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1066  }
1067  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1068)
1069
1070;; Max/Min operations.
1071(define_insn "<su><maxmin><mode>3"
1072 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1073       (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1074		    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1075 "TARGET_SIMD"
1076 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1077  [(set_attr "type" "neon_minmax<q>")]
1078)
1079
1080(define_expand "<su><maxmin>v2di3"
1081 [(set (match_operand:V2DI 0 "register_operand" "")
1082       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1083                    (match_operand:V2DI 2 "register_operand" "")))]
1084 "TARGET_SIMD"
1085{
1086  enum rtx_code cmp_operator;
1087  rtx cmp_fmt;
1088
1089  switch (<CODE>)
1090    {
1091    case UMIN:
1092      cmp_operator = LTU;
1093      break;
1094    case SMIN:
1095      cmp_operator = LT;
1096      break;
1097    case UMAX:
1098      cmp_operator = GTU;
1099      break;
1100    case SMAX:
1101      cmp_operator = GT;
1102      break;
1103    default:
1104      gcc_unreachable ();
1105    }
1106
1107  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1108  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1109              operands[2], cmp_fmt, operands[1], operands[2]));
1110  DONE;
1111})
1112
1113;; Pairwise Integer Max/Min operations.
1114(define_insn "aarch64_<maxmin_uns>p<mode>"
1115 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1116       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1117			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1118			MAXMINV))]
1119 "TARGET_SIMD"
1120 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1121  [(set_attr "type" "neon_minmax<q>")]
1122)
1123
1124;; Pairwise FP Max/Min operations.
1125(define_insn "aarch64_<maxmin_uns>p<mode>"
1126 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1127       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1128		      (match_operand:VHSDF 2 "register_operand" "w")]
1129		      FMAXMINV))]
1130 "TARGET_SIMD"
1131 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1132  [(set_attr "type" "neon_minmax<q>")]
1133)
1134
1135;; vec_concat gives a new vector with the low elements from operand 1, and
1136;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1137;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1138;; What that means, is that the RTL descriptions of the below patterns
1139;; need to change depending on endianness.
1140
1141;; Move to the low architectural bits of the register.
1142;; On little-endian this is { operand, zeroes }
1143;; On big-endian this is { zeroes, operand }
1144
1145(define_insn "move_lo_quad_internal_<mode>"
1146  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1147	(vec_concat:VQ_NO2E
1148	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1149	  (vec_duplicate:<VHALF> (const_int 0))))]
1150  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1151  "@
1152   dup\\t%d0, %1.d[0]
1153   fmov\\t%d0, %1
1154   dup\\t%d0, %1"
1155  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1156   (set_attr "simd" "yes,*,yes")
1157   (set_attr "fp" "*,yes,*")
1158   (set_attr "length" "4")]
1159)
1160
1161(define_insn "move_lo_quad_internal_<mode>"
1162  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1163	(vec_concat:VQ_2E
1164	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1165	  (const_int 0)))]
1166  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1167  "@
1168   dup\\t%d0, %1.d[0]
1169   fmov\\t%d0, %1
1170   dup\\t%d0, %1"
1171  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1172   (set_attr "simd" "yes,*,yes")
1173   (set_attr "fp" "*,yes,*")
1174   (set_attr "length" "4")]
1175)
1176
1177(define_insn "move_lo_quad_internal_be_<mode>"
1178  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1179	(vec_concat:VQ_NO2E
1180	  (vec_duplicate:<VHALF> (const_int 0))
1181	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1182  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1183  "@
1184   dup\\t%d0, %1.d[0]
1185   fmov\\t%d0, %1
1186   dup\\t%d0, %1"
1187  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1188   (set_attr "simd" "yes,*,yes")
1189   (set_attr "fp" "*,yes,*")
1190   (set_attr "length" "4")]
1191)
1192
1193(define_insn "move_lo_quad_internal_be_<mode>"
1194  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1195	(vec_concat:VQ_2E
1196	  (const_int 0)
1197	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1198  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1199  "@
1200   dup\\t%d0, %1.d[0]
1201   fmov\\t%d0, %1
1202   dup\\t%d0, %1"
1203  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1204   (set_attr "simd" "yes,*,yes")
1205   (set_attr "fp" "*,yes,*")
1206   (set_attr "length" "4")]
1207)
1208
1209(define_expand "move_lo_quad_<mode>"
1210  [(match_operand:VQ 0 "register_operand")
1211   (match_operand:VQ 1 "register_operand")]
1212  "TARGET_SIMD"
1213{
1214  if (BYTES_BIG_ENDIAN)
1215    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1216  else
1217    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1218  DONE;
1219}
1220)
1221
1222;; Move operand1 to the high architectural bits of the register, keeping
1223;; the low architectural bits of operand2.
1224;; For little-endian this is { operand2, operand1 }
1225;; For big-endian this is { operand1, operand2 }
1226
1227(define_insn "aarch64_simd_move_hi_quad_<mode>"
1228  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1229        (vec_concat:VQ
1230          (vec_select:<VHALF>
1231                (match_dup 0)
1232                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1233	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1234  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1235  "@
1236   ins\\t%0.d[1], %1.d[0]
1237   ins\\t%0.d[1], %1"
1238  [(set_attr "type" "neon_ins")]
1239)
1240
1241(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1242  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1243        (vec_concat:VQ
1244	  (match_operand:<VHALF> 1 "register_operand" "w,r")
1245          (vec_select:<VHALF>
1246                (match_dup 0)
1247                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1248  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1249  "@
1250   ins\\t%0.d[1], %1.d[0]
1251   ins\\t%0.d[1], %1"
1252  [(set_attr "type" "neon_ins")]
1253)
1254
1255(define_expand "move_hi_quad_<mode>"
1256 [(match_operand:VQ 0 "register_operand" "")
1257  (match_operand:<VHALF> 1 "register_operand" "")]
1258 "TARGET_SIMD"
1259{
1260  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1261  if (BYTES_BIG_ENDIAN)
1262    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1263		    operands[1], p));
1264  else
1265    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1266		    operands[1], p));
1267  DONE;
1268})
1269
1270;; Narrowing operations.
1271
1272;; For doubles.
1273(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1274 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1275       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1276 "TARGET_SIMD"
1277 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1278  [(set_attr "type" "neon_shift_imm_narrow_q")]
1279)
1280
1281(define_expand "vec_pack_trunc_<mode>"
1282 [(match_operand:<VNARROWD> 0 "register_operand" "")
1283  (match_operand:VDN 1 "register_operand" "")
1284  (match_operand:VDN 2 "register_operand" "")]
1285 "TARGET_SIMD"
1286{
1287  rtx tempreg = gen_reg_rtx (<VDBL>mode);
1288  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1289  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1290
1291  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1292  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1293  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1294  DONE;
1295})
1296
1297;; For quads.
1298
1299(define_insn "vec_pack_trunc_<mode>"
1300 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1301       (vec_concat:<VNARROWQ2>
1302	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1303	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1304 "TARGET_SIMD"
1305 {
1306   if (BYTES_BIG_ENDIAN)
1307     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1308   else
1309     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1310 }
1311  [(set_attr "type" "multiple")
1312   (set_attr "length" "8")]
1313)
1314
1315;; Widening operations.
1316
1317(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1318  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1319        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1320			       (match_operand:VQW 1 "register_operand" "w")
1321			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1322			    )))]
1323  "TARGET_SIMD"
1324  "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1325  [(set_attr "type" "neon_shift_imm_long")]
1326)
1327
1328(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1329  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1330        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1331			       (match_operand:VQW 1 "register_operand" "w")
1332			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1333			    )))]
1334  "TARGET_SIMD"
1335  "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1336  [(set_attr "type" "neon_shift_imm_long")]
1337)
1338
1339(define_expand "vec_unpack<su>_hi_<mode>"
1340  [(match_operand:<VWIDE> 0 "register_operand" "")
1341   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1342  "TARGET_SIMD"
1343  {
1344    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1345    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1346							  operands[1], p));
1347    DONE;
1348  }
1349)
1350
1351(define_expand "vec_unpack<su>_lo_<mode>"
1352  [(match_operand:<VWIDE> 0 "register_operand" "")
1353   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1354  "TARGET_SIMD"
1355  {
1356    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1357    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1358							  operands[1], p));
1359    DONE;
1360  }
1361)
1362
1363;; Widening arithmetic.
1364
1365(define_insn "*aarch64_<su>mlal_lo<mode>"
1366  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1367        (plus:<VWIDE>
1368          (mult:<VWIDE>
1369              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370                 (match_operand:VQW 2 "register_operand" "w")
1371                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1372              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1373                 (match_operand:VQW 4 "register_operand" "w")
1374                 (match_dup 3))))
1375          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1376  "TARGET_SIMD"
1377  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1378  [(set_attr "type" "neon_mla_<Vetype>_long")]
1379)
1380
1381(define_insn "*aarch64_<su>mlal_hi<mode>"
1382  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1383        (plus:<VWIDE>
1384          (mult:<VWIDE>
1385              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1386                 (match_operand:VQW 2 "register_operand" "w")
1387                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1388              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1389                 (match_operand:VQW 4 "register_operand" "w")
1390                 (match_dup 3))))
1391          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1392  "TARGET_SIMD"
1393  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1394  [(set_attr "type" "neon_mla_<Vetype>_long")]
1395)
1396
1397(define_insn "*aarch64_<su>mlsl_lo<mode>"
1398  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1399        (minus:<VWIDE>
1400          (match_operand:<VWIDE> 1 "register_operand" "0")
1401          (mult:<VWIDE>
1402              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403                 (match_operand:VQW 2 "register_operand" "w")
1404                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1405              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1406                 (match_operand:VQW 4 "register_operand" "w")
1407                 (match_dup 3))))))]
1408  "TARGET_SIMD"
1409  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1410  [(set_attr "type" "neon_mla_<Vetype>_long")]
1411)
1412
1413(define_insn "*aarch64_<su>mlsl_hi<mode>"
1414  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1415        (minus:<VWIDE>
1416          (match_operand:<VWIDE> 1 "register_operand" "0")
1417          (mult:<VWIDE>
1418              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1419                 (match_operand:VQW 2 "register_operand" "w")
1420                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1421              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1422                 (match_operand:VQW 4 "register_operand" "w")
1423                 (match_dup 3))))))]
1424  "TARGET_SIMD"
1425  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1426  [(set_attr "type" "neon_mla_<Vetype>_long")]
1427)
1428
1429(define_insn "*aarch64_<su>mlal<mode>"
1430  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1431        (plus:<VWIDE>
1432          (mult:<VWIDE>
1433            (ANY_EXTEND:<VWIDE>
1434              (match_operand:VD_BHSI 1 "register_operand" "w"))
1435            (ANY_EXTEND:<VWIDE>
1436              (match_operand:VD_BHSI 2 "register_operand" "w")))
1437          (match_operand:<VWIDE> 3 "register_operand" "0")))]
1438  "TARGET_SIMD"
1439  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1440  [(set_attr "type" "neon_mla_<Vetype>_long")]
1441)
1442
1443(define_insn "*aarch64_<su>mlsl<mode>"
1444  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1445        (minus:<VWIDE>
1446          (match_operand:<VWIDE> 1 "register_operand" "0")
1447          (mult:<VWIDE>
1448            (ANY_EXTEND:<VWIDE>
1449              (match_operand:VD_BHSI 2 "register_operand" "w"))
1450            (ANY_EXTEND:<VWIDE>
1451              (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1452  "TARGET_SIMD"
1453  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1454  [(set_attr "type" "neon_mla_<Vetype>_long")]
1455)
1456
1457(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1459       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1460			   (match_operand:VQW 1 "register_operand" "w")
1461                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1462		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1463                           (match_operand:VQW 2 "register_operand" "w")
1464                           (match_dup 3)))))]
1465  "TARGET_SIMD"
1466  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1467  [(set_attr "type" "neon_mul_<Vetype>_long")]
1468)
1469
1470(define_expand "vec_widen_<su>mult_lo_<mode>"
1471  [(match_operand:<VWIDE> 0 "register_operand" "")
1472   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1473   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1474 "TARGET_SIMD"
1475 {
1476   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1477   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1478						       operands[1],
1479						       operands[2], p));
1480   DONE;
1481 }
1482)
1483
1484(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1485 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1486      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1487			    (match_operand:VQW 1 "register_operand" "w")
1488			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1489		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1490			    (match_operand:VQW 2 "register_operand" "w")
1491			    (match_dup 3)))))]
1492  "TARGET_SIMD"
1493  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1494  [(set_attr "type" "neon_mul_<Vetype>_long")]
1495)
1496
1497(define_expand "vec_widen_<su>mult_hi_<mode>"
1498  [(match_operand:<VWIDE> 0 "register_operand" "")
1499   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1500   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1501 "TARGET_SIMD"
1502 {
1503   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1504   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1505						       operands[1],
1506						       operands[2], p));
1507   DONE;
1508
1509 }
1510)
1511
1512;; FP vector operations.
1513;; AArch64 AdvSIMD supports single-precision (32-bit) and
1514;; double-precision (64-bit) floating-point data types and arithmetic as
1515;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1516;; without the need for -ffast-math or -funsafe-math-optimizations.
1517;;
1518;; Floating-point operations can raise an exception.  Vectorizing such
1519;; operations are safe because of reasons explained below.
1520;;
1521;; ARMv8 permits an extension to enable trapped floating-point
1522;; exception handling, however this is an optional feature.  In the
1523;; event of a floating-point exception being raised by vectorised
1524;; code then:
1525;; 1.  If trapped floating-point exceptions are available, then a trap
1526;;     will be taken when any lane raises an enabled exception.  A trap
1527;;     handler may determine which lane raised the exception.
1528;; 2.  Alternatively a sticky exception flag is set in the
1529;;     floating-point status register (FPSR).  Software may explicitly
1530;;     test the exception flags, in which case the tests will either
1531;;     prevent vectorisation, allowing precise identification of the
1532;;     failing operation, or if tested outside of vectorisable regions
1533;;     then the specific operation and lane are not of interest.
1534
1535;; FP arithmetic operations.
1536
1537(define_insn "add<mode>3"
1538 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540		   (match_operand:VHSDF 2 "register_operand" "w")))]
1541 "TARGET_SIMD"
1542 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1544)
1545
1546(define_insn "sub<mode>3"
1547 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548       (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549		    (match_operand:VHSDF 2 "register_operand" "w")))]
1550 "TARGET_SIMD"
1551 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1552  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1553)
1554
1555(define_insn "mul<mode>3"
1556 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1557       (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1558		   (match_operand:VHSDF 2 "register_operand" "w")))]
1559 "TARGET_SIMD"
1560 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1561  [(set_attr "type" "neon_fp_mul_<stype><q>")]
1562)
1563
1564(define_expand "div<mode>3"
1565 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1566       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1567		  (match_operand:VHSDF 2 "register_operand" "w")))]
1568 "TARGET_SIMD"
1569{
1570  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1571    DONE;
1572
1573  operands[1] = force_reg (<MODE>mode, operands[1]);
1574})
1575
1576(define_insn "*div<mode>3"
1577 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1578       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1579		 (match_operand:VHSDF 2 "register_operand" "w")))]
1580 "TARGET_SIMD"
1581 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1582  [(set_attr "type" "neon_fp_div_<stype><q>")]
1583)
1584
1585(define_insn "neg<mode>2"
1586 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1587       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1588 "TARGET_SIMD"
1589 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1590  [(set_attr "type" "neon_fp_neg_<stype><q>")]
1591)
1592
1593(define_insn "abs<mode>2"
1594 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1595       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1596 "TARGET_SIMD"
1597 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1598  [(set_attr "type" "neon_fp_abs_<stype><q>")]
1599)
1600
1601(define_insn "fma<mode>4"
1602  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1603       (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1604		  (match_operand:VHSDF 2 "register_operand" "w")
1605		  (match_operand:VHSDF 3 "register_operand" "0")))]
1606  "TARGET_SIMD"
1607 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1608  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1609)
1610
1611(define_insn "*aarch64_fma4_elt<mode>"
1612  [(set (match_operand:VDQF 0 "register_operand" "=w")
1613    (fma:VDQF
1614      (vec_duplicate:VDQF
1615	(vec_select:<VEL>
1616	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1617	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1618      (match_operand:VDQF 3 "register_operand" "w")
1619      (match_operand:VDQF 4 "register_operand" "0")))]
1620  "TARGET_SIMD"
1621  {
1622    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1623    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1624  }
1625  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1626)
1627
1628(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1629  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1630    (fma:VDQSF
1631      (vec_duplicate:VDQSF
1632	(vec_select:<VEL>
1633	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1634	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1635      (match_operand:VDQSF 3 "register_operand" "w")
1636      (match_operand:VDQSF 4 "register_operand" "0")))]
1637  "TARGET_SIMD"
1638  {
1639    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1640					  INTVAL (operands[2])));
1641    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1642  }
1643  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1644)
1645
1646(define_insn "*aarch64_fma4_elt_from_dup<mode>"
1647  [(set (match_operand:VMUL 0 "register_operand" "=w")
1648    (fma:VMUL
1649      (vec_duplicate:VMUL
1650	  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1651      (match_operand:VMUL 2 "register_operand" "w")
1652      (match_operand:VMUL 3 "register_operand" "0")))]
1653  "TARGET_SIMD"
1654  "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1655  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1656)
1657
1658(define_insn "*aarch64_fma4_elt_to_64v2df"
1659  [(set (match_operand:DF 0 "register_operand" "=w")
1660    (fma:DF
1661	(vec_select:DF
1662	  (match_operand:V2DF 1 "register_operand" "w")
1663	  (parallel [(match_operand:SI 2 "immediate_operand")]))
1664      (match_operand:DF 3 "register_operand" "w")
1665      (match_operand:DF 4 "register_operand" "0")))]
1666  "TARGET_SIMD"
1667  {
1668    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1669    return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1670  }
1671  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1672)
1673
1674(define_insn "fnma<mode>4"
1675  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1676	(fma:VHSDF
1677	  (match_operand:VHSDF 1 "register_operand" "w")
1678          (neg:VHSDF
1679	    (match_operand:VHSDF 2 "register_operand" "w"))
1680	  (match_operand:VHSDF 3 "register_operand" "0")))]
1681  "TARGET_SIMD"
1682  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1683  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1684)
1685
1686(define_insn "*aarch64_fnma4_elt<mode>"
1687  [(set (match_operand:VDQF 0 "register_operand" "=w")
1688    (fma:VDQF
1689      (neg:VDQF
1690        (match_operand:VDQF 3 "register_operand" "w"))
1691      (vec_duplicate:VDQF
1692	(vec_select:<VEL>
1693	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1694	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1695      (match_operand:VDQF 4 "register_operand" "0")))]
1696  "TARGET_SIMD"
1697  {
1698    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1699    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1700  }
1701  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1702)
1703
1704(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1705  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1706    (fma:VDQSF
1707      (neg:VDQSF
1708        (match_operand:VDQSF 3 "register_operand" "w"))
1709      (vec_duplicate:VDQSF
1710	(vec_select:<VEL>
1711	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1713      (match_operand:VDQSF 4 "register_operand" "0")))]
1714  "TARGET_SIMD"
1715  {
1716    operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1717					  INTVAL (operands[2])));
1718    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1719  }
1720  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1721)
1722
1723(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1724  [(set (match_operand:VMUL 0 "register_operand" "=w")
1725    (fma:VMUL
1726      (neg:VMUL
1727        (match_operand:VMUL 2 "register_operand" "w"))
1728      (vec_duplicate:VMUL
1729	(match_operand:<VEL> 1 "register_operand" "<h_con>"))
1730      (match_operand:VMUL 3 "register_operand" "0")))]
1731  "TARGET_SIMD"
1732  "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1733  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1734)
1735
1736(define_insn "*aarch64_fnma4_elt_to_64v2df"
1737  [(set (match_operand:DF 0 "register_operand" "=w")
1738    (fma:DF
1739      (vec_select:DF
1740	(match_operand:V2DF 1 "register_operand" "w")
1741	(parallel [(match_operand:SI 2 "immediate_operand")]))
1742      (neg:DF
1743        (match_operand:DF 3 "register_operand" "w"))
1744      (match_operand:DF 4 "register_operand" "0")))]
1745  "TARGET_SIMD"
1746  {
1747    operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1748    return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1749  }
1750  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1751)
1752
1753;; Vector versions of the floating-point frint patterns.
1754;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1755(define_insn "<frint_pattern><mode>2"
1756  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1757	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1758		       FRINT))]
1759  "TARGET_SIMD"
1760  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1761  [(set_attr "type" "neon_fp_round_<stype><q>")]
1762)
1763
1764;; Vector versions of the fcvt standard patterns.
1765;; Expands to lbtrunc, lround, lceil, lfloor
1766(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1767  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1768	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1769			       [(match_operand:VHSDF 1 "register_operand" "w")]
1770			       FCVT)))]
1771  "TARGET_SIMD"
1772  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1773  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1774)
1775
1776;; HF Scalar variants of related SIMD instructions.
1777(define_insn "l<fcvt_pattern><su_optab>hfhi2"
1778  [(set (match_operand:HI 0 "register_operand" "=w")
1779	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1780		      FCVT)))]
1781  "TARGET_SIMD_F16INST"
1782  "fcvt<frint_suffix><su>\t%h0, %h1"
1783  [(set_attr "type" "neon_fp_to_int_s")]
1784)
1785
1786(define_insn "<optab>_trunchfhi2"
1787  [(set (match_operand:HI 0 "register_operand" "=w")
1788	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1789  "TARGET_SIMD_F16INST"
1790  "fcvtz<su>\t%h0, %h1"
1791  [(set_attr "type" "neon_fp_to_int_s")]
1792)
1793
1794(define_insn "<optab>hihf2"
1795  [(set (match_operand:HF 0 "register_operand" "=w")
1796	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1797  "TARGET_SIMD_F16INST"
1798  "<su_optab>cvtf\t%h0, %h1"
1799  [(set_attr "type" "neon_int_to_fp_s")]
1800)
1801
1802(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1803  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1804	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1805			       [(mult:VDQF
1806	 (match_operand:VDQF 1 "register_operand" "w")
1807	 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1808			       UNSPEC_FRINTZ)))]
1809  "TARGET_SIMD
1810   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1811		GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1812  {
1813    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1814    char buf[64];
1815    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1816    output_asm_insn (buf, operands);
1817    return "";
1818  }
1819  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1820)
1821
1822(define_expand "<optab><VHSDF:mode><fcvt_target>2"
1823  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1824	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1825			       [(match_operand:VHSDF 1 "register_operand")]
1826				UNSPEC_FRINTZ)))]
1827  "TARGET_SIMD"
1828  {})
1829
1830(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1831  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1832	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1833			       [(match_operand:VHSDF 1 "register_operand")]
1834				UNSPEC_FRINTZ)))]
1835  "TARGET_SIMD"
1836  {})
1837
1838(define_expand "ftrunc<VHSDF:mode>2"
1839  [(set (match_operand:VHSDF 0 "register_operand")
1840	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1841		       UNSPEC_FRINTZ))]
1842  "TARGET_SIMD"
1843  {})
1844
1845(define_insn "<optab><fcvt_target><VHSDF:mode>2"
1846  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1847	(FLOATUORS:VHSDF
1848	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1849  "TARGET_SIMD"
1850  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1851  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1852)
1853
1854;; Conversions between vectors of floats and doubles.
1855;; Contains a mix of patterns to match standard pattern names
1856;; and those for intrinsics.
1857
1858;; Float widening operations.
1859
1860(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1861  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1862        (float_extend:<VWIDE> (vec_select:<VHALF>
1863			       (match_operand:VQ_HSF 1 "register_operand" "w")
1864			       (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1865			    )))]
1866  "TARGET_SIMD"
1867  "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1868  [(set_attr "type" "neon_fp_cvt_widen_s")]
1869)
1870
1871;; Convert between fixed-point and floating-point (vector modes)
1872
1873(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1874  [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1875	(unspec:<VHSDF:FCVT_TARGET>
1876	  [(match_operand:VHSDF 1 "register_operand" "w")
1877	   (match_operand:SI 2 "immediate_operand" "i")]
1878	 FCVT_F2FIXED))]
1879  "TARGET_SIMD"
1880  "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1881  [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1882)
1883
1884(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1885  [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1886	(unspec:<VDQ_HSDI:FCVT_TARGET>
1887	  [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1888	   (match_operand:SI 2 "immediate_operand" "i")]
1889	 FCVT_FIXED2F))]
1890  "TARGET_SIMD"
1891  "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1892  [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1893)
1894
1895;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1896;; is inconsistent with vector ordering elsewhere in the compiler, in that
1897;; the meaning of HI and LO changes depending on the target endianness.
1898;; While elsewhere we map the higher numbered elements of a vector to
1899;; the lower architectural lanes of the vector, for these patterns we want
1900;; to always treat "hi" as referring to the higher architectural lanes.
1901;; Consequently, while the patterns below look inconsistent with our
1902;; other big-endian patterns their behavior is as required.
1903
1904(define_expand "vec_unpacks_lo_<mode>"
1905  [(match_operand:<VWIDE> 0 "register_operand" "")
1906   (match_operand:VQ_HSF 1 "register_operand" "")]
1907  "TARGET_SIMD"
1908  {
1909    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1910    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1911						       operands[1], p));
1912    DONE;
1913  }
1914)
1915
1916(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1917  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1918        (float_extend:<VWIDE> (vec_select:<VHALF>
1919			       (match_operand:VQ_HSF 1 "register_operand" "w")
1920			       (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1921			    )))]
1922  "TARGET_SIMD"
1923  "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1924  [(set_attr "type" "neon_fp_cvt_widen_s")]
1925)
1926
1927(define_expand "vec_unpacks_hi_<mode>"
1928  [(match_operand:<VWIDE> 0 "register_operand" "")
1929   (match_operand:VQ_HSF 1 "register_operand" "")]
1930  "TARGET_SIMD"
1931  {
1932    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1933    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1934						       operands[1], p));
1935    DONE;
1936  }
1937)
1938(define_insn "aarch64_float_extend_lo_<Vwide>"
1939  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1940	(float_extend:<VWIDE>
1941	  (match_operand:VDF 1 "register_operand" "w")))]
1942  "TARGET_SIMD"
1943  "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1944  [(set_attr "type" "neon_fp_cvt_widen_s")]
1945)
1946
1947;; Float narrowing operations.
1948
1949(define_insn "aarch64_float_truncate_lo_<mode>"
1950  [(set (match_operand:VDF 0 "register_operand" "=w")
1951      (float_truncate:VDF
1952	(match_operand:<VWIDE> 1 "register_operand" "w")))]
1953  "TARGET_SIMD"
1954  "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1955  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1956)
1957
1958(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1959  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1960    (vec_concat:<VDBL>
1961      (match_operand:VDF 1 "register_operand" "0")
1962      (float_truncate:VDF
1963	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
1964  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1965  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1966  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1967)
1968
1969(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1970  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1971    (vec_concat:<VDBL>
1972      (float_truncate:VDF
1973	(match_operand:<VWIDE> 2 "register_operand" "w"))
1974      (match_operand:VDF 1 "register_operand" "0")))]
1975  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1976  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1977  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1978)
1979
1980(define_expand "aarch64_float_truncate_hi_<Vdbl>"
1981  [(match_operand:<VDBL> 0 "register_operand" "=w")
1982   (match_operand:VDF 1 "register_operand" "0")
1983   (match_operand:<VWIDE> 2 "register_operand" "w")]
1984  "TARGET_SIMD"
1985{
1986  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1987			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1988			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1989  emit_insn (gen (operands[0], operands[1], operands[2]));
1990  DONE;
1991}
1992)
1993
1994(define_expand "vec_pack_trunc_v2df"
1995  [(set (match_operand:V4SF 0 "register_operand")
1996      (vec_concat:V4SF
1997	(float_truncate:V2SF
1998	    (match_operand:V2DF 1 "register_operand"))
1999	(float_truncate:V2SF
2000	    (match_operand:V2DF 2 "register_operand"))
2001	  ))]
2002  "TARGET_SIMD"
2003  {
2004    rtx tmp = gen_reg_rtx (V2SFmode);
2005    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2006    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2007
2008    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2009    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2010						   tmp, operands[hi]));
2011    DONE;
2012  }
2013)
2014
2015(define_expand "vec_pack_trunc_df"
2016  [(set (match_operand:V2SF 0 "register_operand")
2017      (vec_concat:V2SF
2018	(float_truncate:SF
2019	    (match_operand:DF 1 "register_operand"))
2020	(float_truncate:SF
2021	    (match_operand:DF 2 "register_operand"))
2022	  ))]
2023  "TARGET_SIMD"
2024  {
2025    rtx tmp = gen_reg_rtx (V2SFmode);
2026    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2027    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2028
2029    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2030    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2031    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2032    DONE;
2033  }
2034)
2035
2036;; FP Max/Min
2037;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2038;; expression like:
2039;;      a = (b < c) ? b : c;
2040;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2041;; either explicitly or indirectly via -ffast-math.
2042;;
2043;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2044;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2045;; operand will be returned when both operands are zero (i.e. they may not
2046;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2047;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2048;; NaNs.
2049
2050(define_insn "<su><maxmin><mode>3"
2051  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2052	(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2053		       (match_operand:VHSDF 2 "register_operand" "w")))]
2054  "TARGET_SIMD"
2055  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2057)
2058
2059;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2060;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2061;; which implement the IEEE fmax ()/fmin () functions.
2062(define_insn "<maxmin_uns><mode>3"
2063  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2064       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2065		      (match_operand:VHSDF 2 "register_operand" "w")]
2066		      FMAXMIN_UNS))]
2067  "TARGET_SIMD"
2068  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2069  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2070)
2071
2072;; 'across lanes' add.
2073
2074(define_expand "reduc_plus_scal_<mode>"
2075  [(match_operand:<VEL> 0 "register_operand" "=w")
2076   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2077	       UNSPEC_ADDV)]
2078  "TARGET_SIMD"
2079  {
2080    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2081    rtx scratch = gen_reg_rtx (<MODE>mode);
2082    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2083    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2084    DONE;
2085  }
2086)
2087
2088(define_insn "aarch64_faddp<mode>"
2089 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2090       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2091		      (match_operand:VHSDF 2 "register_operand" "w")]
2092	UNSPEC_FADDV))]
2093 "TARGET_SIMD"
2094 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2095  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2096)
2097
2098(define_insn "aarch64_reduc_plus_internal<mode>"
2099 [(set (match_operand:VDQV 0 "register_operand" "=w")
2100       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2101		    UNSPEC_ADDV))]
2102 "TARGET_SIMD"
2103 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2104  [(set_attr "type" "neon_reduc_add<q>")]
2105)
2106
2107(define_insn "aarch64_reduc_plus_internalv2si"
2108 [(set (match_operand:V2SI 0 "register_operand" "=w")
2109       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2110		    UNSPEC_ADDV))]
2111 "TARGET_SIMD"
2112 "addp\\t%0.2s, %1.2s, %1.2s"
2113  [(set_attr "type" "neon_reduc_add")]
2114)
2115
2116(define_insn "reduc_plus_scal_<mode>"
2117 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2118       (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2119		   UNSPEC_FADDV))]
2120 "TARGET_SIMD"
2121 "faddp\\t%<Vetype>0, %1.<Vtype>"
2122  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2123)
2124
2125(define_expand "reduc_plus_scal_v4sf"
2126 [(set (match_operand:SF 0 "register_operand")
2127       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2128		    UNSPEC_FADDV))]
2129 "TARGET_SIMD"
2130{
2131  rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2132  rtx scratch = gen_reg_rtx (V4SFmode);
2133  emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2134  emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2135  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2136  DONE;
2137})
2138
2139(define_insn "clrsb<mode>2"
2140  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2141        (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2142  "TARGET_SIMD"
2143  "cls\\t%0.<Vtype>, %1.<Vtype>"
2144  [(set_attr "type" "neon_cls<q>")]
2145)
2146
2147(define_insn "clz<mode>2"
2148 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2149       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2150 "TARGET_SIMD"
2151 "clz\\t%0.<Vtype>, %1.<Vtype>"
2152  [(set_attr "type" "neon_cls<q>")]
2153)
2154
2155(define_insn "popcount<mode>2"
2156  [(set (match_operand:VB 0 "register_operand" "=w")
2157        (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2158  "TARGET_SIMD"
2159  "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2160  [(set_attr "type" "neon_cnt<q>")]
2161)
2162
2163;; 'across lanes' max and min ops.
2164
2165;; Template for outputting a scalar, so we can create __builtins which can be
2166;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
2167(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2168  [(match_operand:<VEL> 0 "register_operand")
2169   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2170		  FMAXMINV)]
2171  "TARGET_SIMD"
2172  {
2173    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2174    rtx scratch = gen_reg_rtx (<MODE>mode);
2175    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2176							      operands[1]));
2177    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2178    DONE;
2179  }
2180)
2181
2182;; Likewise for integer cases, signed and unsigned.
2183(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2184  [(match_operand:<VEL> 0 "register_operand")
2185   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2186		    MAXMINV)]
2187  "TARGET_SIMD"
2188  {
2189    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2190    rtx scratch = gen_reg_rtx (<MODE>mode);
2191    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2192							      operands[1]));
2193    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2194    DONE;
2195  }
2196)
2197
2198(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2199 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2200       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2201		    MAXMINV))]
2202 "TARGET_SIMD"
2203 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2204  [(set_attr "type" "neon_reduc_minmax<q>")]
2205)
2206
2207(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2208 [(set (match_operand:V2SI 0 "register_operand" "=w")
2209       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2210		    MAXMINV))]
2211 "TARGET_SIMD"
2212 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2213  [(set_attr "type" "neon_reduc_minmax")]
2214)
2215
2216(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2217 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2218       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2219		      FMAXMINV))]
2220 "TARGET_SIMD"
2221 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2222  [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2223)
2224
2225;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2226;; allocation.
2227;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2228;; to select.
2229;;
2230;; Thus our BSL is of the form:
2231;;   op0 = bsl (mask, op2, op3)
2232;; We can use any of:
2233;;
2234;;   if (op0 = mask)
2235;;     bsl mask, op1, op2
2236;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2237;;     bit op0, op2, mask
2238;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2239;;     bif op0, op1, mask
2240;;
2241;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2242;; Some forms of straight-line code may generate the equivalent form
2243;; in *aarch64_simd_bsl<mode>_alt.
2244
2245(define_insn "aarch64_simd_bsl<mode>_internal"
2246  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2247	(xor:VSDQ_I_DI
2248	   (and:VSDQ_I_DI
2249	     (xor:VSDQ_I_DI
2250	       (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2251	       (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2252	     (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2253	  (match_dup:<V_cmp_result> 3)
2254	))]
2255  "TARGET_SIMD"
2256  "@
2257  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2258  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2259  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2260  [(set_attr "type" "neon_bsl<q>")]
2261)
2262
2263;; We need this form in addition to the above pattern to match the case
2264;; when combine tries merging three insns such that the second operand of
2265;; the outer XOR matches the second operand of the inner XOR rather than
2266;; the first.  The two are equivalent but since recog doesn't try all
2267;; permutations of commutative operations, we have to have a separate pattern.
2268
2269(define_insn "*aarch64_simd_bsl<mode>_alt"
2270  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2271	(xor:VSDQ_I_DI
2272	   (and:VSDQ_I_DI
2273	     (xor:VSDQ_I_DI
2274	       (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2275	       (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2276	      (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2277	  (match_dup:VSDQ_I_DI 2)))]
2278  "TARGET_SIMD"
2279  "@
2280  bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2281  bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2282  bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2283  [(set_attr "type" "neon_bsl<q>")]
2284)
2285
2286(define_expand "aarch64_simd_bsl<mode>"
2287  [(match_operand:VALLDIF 0 "register_operand")
2288   (match_operand:<V_cmp_result> 1 "register_operand")
2289   (match_operand:VALLDIF 2 "register_operand")
2290   (match_operand:VALLDIF 3 "register_operand")]
2291 "TARGET_SIMD"
2292{
2293  /* We can't alias operands together if they have different modes.  */
2294  rtx tmp = operands[0];
2295  if (FLOAT_MODE_P (<MODE>mode))
2296    {
2297      operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2298      operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2299      tmp = gen_reg_rtx (<V_cmp_result>mode);
2300    }
2301  operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2302  emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2303							  operands[1],
2304							  operands[2],
2305							  operands[3]));
2306  if (tmp != operands[0])
2307    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2308
2309  DONE;
2310})
2311
2312(define_expand "vcond_mask_<mode><v_cmp_result>"
2313  [(match_operand:VALLDI 0 "register_operand")
2314   (match_operand:VALLDI 1 "nonmemory_operand")
2315   (match_operand:VALLDI 2 "nonmemory_operand")
2316   (match_operand:<V_cmp_result> 3 "register_operand")]
2317  "TARGET_SIMD"
2318{
2319  /* If we have (a = (P) ? -1 : 0);
2320     Then we can simply move the generated mask (result must be int).  */
2321  if (operands[1] == CONSTM1_RTX (<MODE>mode)
2322      && operands[2] == CONST0_RTX (<MODE>mode))
2323    emit_move_insn (operands[0], operands[3]);
2324  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2325  else if (operands[1] == CONST0_RTX (<MODE>mode)
2326	   && operands[2] == CONSTM1_RTX (<MODE>mode))
2327    emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2328  else
2329    {
2330      if (!REG_P (operands[1]))
2331	operands[1] = force_reg (<MODE>mode, operands[1]);
2332      if (!REG_P (operands[2]))
2333	operands[2] = force_reg (<MODE>mode, operands[2]);
2334      emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2335					     operands[1], operands[2]));
2336    }
2337
2338  DONE;
2339})
2340
2341;; Patterns comparing two vectors to produce a mask.
2342
2343(define_expand "vec_cmp<mode><mode>"
2344  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2345	  (match_operator 1 "comparison_operator"
2346	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2347	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2348  "TARGET_SIMD"
2349{
2350  rtx mask = operands[0];
2351  enum rtx_code code = GET_CODE (operands[1]);
2352
2353  switch (code)
2354    {
2355    case NE:
2356    case LE:
2357    case LT:
2358    case GE:
2359    case GT:
2360    case EQ:
2361      if (operands[3] == CONST0_RTX (<MODE>mode))
2362	break;
2363
2364      /* Fall through.  */
2365    default:
2366      if (!REG_P (operands[3]))
2367	operands[3] = force_reg (<MODE>mode, operands[3]);
2368
2369      break;
2370    }
2371
2372  switch (code)
2373    {
2374    case LT:
2375      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2376      break;
2377
2378    case GE:
2379      emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2380      break;
2381
2382    case LE:
2383      emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2384      break;
2385
2386    case GT:
2387      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2388      break;
2389
2390    case LTU:
2391      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2392      break;
2393
2394    case GEU:
2395      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2396      break;
2397
2398    case LEU:
2399      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2400      break;
2401
2402    case GTU:
2403      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2404      break;
2405
2406    case NE:
2407      /* Handle NE as !EQ.  */
2408      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2409      emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2410      break;
2411
2412    case EQ:
2413      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2414      break;
2415
2416    default:
2417      gcc_unreachable ();
2418    }
2419
2420  DONE;
2421})
2422
2423(define_expand "vec_cmp<mode><v_cmp_result>"
2424  [(set (match_operand:<V_cmp_result> 0 "register_operand")
2425	(match_operator 1 "comparison_operator"
2426	    [(match_operand:VDQF 2 "register_operand")
2427	     (match_operand:VDQF 3 "nonmemory_operand")]))]
2428  "TARGET_SIMD"
2429{
2430  int use_zero_form = 0;
2431  enum rtx_code code = GET_CODE (operands[1]);
2432  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2433
2434  rtx (*comparison) (rtx, rtx, rtx) = NULL;
2435
2436  switch (code)
2437    {
2438    case LE:
2439    case LT:
2440    case GE:
2441    case GT:
2442    case EQ:
2443      if (operands[3] == CONST0_RTX (<MODE>mode))
2444	{
2445	  use_zero_form = 1;
2446	  break;
2447	}
2448      /* Fall through.  */
2449    default:
2450      if (!REG_P (operands[3]))
2451	operands[3] = force_reg (<MODE>mode, operands[3]);
2452
2453      break;
2454    }
2455
2456  switch (code)
2457    {
2458    case LT:
2459      if (use_zero_form)
2460	{
2461	  comparison = gen_aarch64_cmlt<mode>;
2462	  break;
2463	}
2464      /* Fall through.  */
2465    case UNLT:
2466      std::swap (operands[2], operands[3]);
2467      /* Fall through.  */
2468    case UNGT:
2469    case GT:
2470      comparison = gen_aarch64_cmgt<mode>;
2471      break;
2472    case LE:
2473      if (use_zero_form)
2474	{
2475	  comparison = gen_aarch64_cmle<mode>;
2476	  break;
2477	}
2478      /* Fall through.  */
2479    case UNLE:
2480      std::swap (operands[2], operands[3]);
2481      /* Fall through.  */
2482    case UNGE:
2483    case GE:
2484      comparison = gen_aarch64_cmge<mode>;
2485      break;
2486    case NE:
2487    case EQ:
2488      comparison = gen_aarch64_cmeq<mode>;
2489      break;
2490    case UNEQ:
2491    case ORDERED:
2492    case UNORDERED:
2493    case LTGT:
2494      break;
2495    default:
2496      gcc_unreachable ();
2497    }
2498
2499  switch (code)
2500    {
2501    case UNGE:
2502    case UNGT:
2503    case UNLE:
2504    case UNLT:
2505      {
2506	/* All of the above must not raise any FP exceptions.  Thus we first
2507	   check each operand for NaNs and force any elements containing NaN to
2508	   zero before using them in the compare.
2509	   Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2510				     (cm<cc> (isnan (a) ? 0.0 : a,
2511					      isnan (b) ? 0.0 : b))
2512	   We use the following transformations for doing the comparisions:
2513	   a UNGE b -> a GE b
2514	   a UNGT b -> a GT b
2515	   a UNLE b -> b GE a
2516	   a UNLT b -> b GT a.  */
2517
2518	rtx tmp0 = gen_reg_rtx (<V_cmp_result>mode);
2519	rtx tmp1 = gen_reg_rtx (<V_cmp_result>mode);
2520	rtx tmp2 = gen_reg_rtx (<V_cmp_result>mode);
2521	emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2522	emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2523	emit_insn (gen_and<v_cmp_result>3 (tmp2, tmp0, tmp1));
2524	emit_insn (gen_and<v_cmp_result>3 (tmp0, tmp0,
2525			lowpart_subreg (<V_cmp_result>mode, operands[2], <MODE>mode)));
2526	emit_insn (gen_and<v_cmp_result>3 (tmp1, tmp1,
2527			lowpart_subreg (<V_cmp_result>mode, operands[3], <MODE>mode)));
2528	gcc_assert (comparison != NULL);
2529	emit_insn (comparison (operands[0],
2530			       lowpart_subreg (<MODE>mode, tmp0, <V_cmp_result>mode),
2531			       lowpart_subreg (<MODE>mode, tmp1, <V_cmp_result>mode)));
2532	emit_insn (gen_orn<v_cmp_result>3 (operands[0], tmp2, operands[0]));
2533      }
2534      break;
2535
2536    case LT:
2537    case LE:
2538    case GT:
2539    case GE:
2540    case EQ:
2541    case NE:
2542      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2543	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2544	 a GE b -> a GE b
2545	 a GT b -> a GT b
2546	 a LE b -> b GE a
2547	 a LT b -> b GT a
2548	 a EQ b -> a EQ b
2549	 a NE b -> ~(a EQ b)  */
2550      gcc_assert (comparison != NULL);
2551      emit_insn (comparison (operands[0], operands[2], operands[3]));
2552      if (code == NE)
2553	emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2554      break;
2555
2556    case LTGT:
2557      /* LTGT is not guranteed to not generate a FP exception.  So let's
2558	 go the faster way : ((a > b) || (b > a)).  */
2559      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2560					 operands[2], operands[3]));
2561      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2562      emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2563      break;
2564
2565    case ORDERED:
2566    case UNORDERED:
2567    case UNEQ:
2568      /* cmeq (a, a) & cmeq (b, b).  */
2569      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2570					 operands[2], operands[2]));
2571      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2572      emit_insn (gen_and<v_cmp_result>3 (operands[0], operands[0], tmp));
2573
2574      if (code == UNORDERED)
2575	emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2576      else if (code == UNEQ)
2577	{
2578	  emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2579	  emit_insn (gen_orn<v_cmp_result>3 (operands[0], operands[0], tmp));
2580	}
2581      break;
2582
2583    default:
2584      gcc_unreachable ();
2585    }
2586
2587  DONE;
2588})
2589
2590(define_expand "vec_cmpu<mode><mode>"
2591  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2592	  (match_operator 1 "comparison_operator"
2593	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2594	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2595  "TARGET_SIMD"
2596{
2597  emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2598				      operands[2], operands[3]));
2599  DONE;
2600})
2601
2602(define_expand "vcond<mode><mode>"
2603  [(set (match_operand:VALLDI 0 "register_operand")
2604	(if_then_else:VALLDI
2605	  (match_operator 3 "comparison_operator"
2606	    [(match_operand:VALLDI 4 "register_operand")
2607	     (match_operand:VALLDI 5 "nonmemory_operand")])
2608	  (match_operand:VALLDI 1 "nonmemory_operand")
2609	  (match_operand:VALLDI 2 "nonmemory_operand")))]
2610  "TARGET_SIMD"
2611{
2612  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2613  enum rtx_code code = GET_CODE (operands[3]);
2614
2615  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2616     it as well as switch operands 1/2 in order to avoid the additional
2617     NOT instruction.  */
2618  if (code == NE)
2619    {
2620      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2621				    operands[4], operands[5]);
2622      std::swap (operands[1], operands[2]);
2623    }
2624  emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2625					      operands[4], operands[5]));
2626  emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2627						  operands[2], mask));
2628
2629  DONE;
2630})
2631
2632(define_expand "vcond<v_cmp_mixed><mode>"
2633  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2634	(if_then_else:<V_cmp_mixed>
2635	  (match_operator 3 "comparison_operator"
2636	    [(match_operand:VDQF_COND 4 "register_operand")
2637	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
2638	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2639	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2640  "TARGET_SIMD"
2641{
2642  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2643  enum rtx_code code = GET_CODE (operands[3]);
2644
2645  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2646     it as well as switch operands 1/2 in order to avoid the additional
2647     NOT instruction.  */
2648  if (code == NE)
2649    {
2650      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2651				    operands[4], operands[5]);
2652      std::swap (operands[1], operands[2]);
2653    }
2654  emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2655					      operands[4], operands[5]));
2656  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2657						operands[0], operands[1],
2658						operands[2], mask));
2659
2660  DONE;
2661})
2662
2663(define_expand "vcondu<mode><mode>"
2664  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2665	(if_then_else:VSDQ_I_DI
2666	  (match_operator 3 "comparison_operator"
2667	    [(match_operand:VSDQ_I_DI 4 "register_operand")
2668	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2669	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2670	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2671  "TARGET_SIMD"
2672{
2673  rtx mask = gen_reg_rtx (<MODE>mode);
2674  enum rtx_code code = GET_CODE (operands[3]);
2675
2676  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2677     it as well as switch operands 1/2 in order to avoid the additional
2678     NOT instruction.  */
2679  if (code == NE)
2680    {
2681      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2682				    operands[4], operands[5]);
2683      std::swap (operands[1], operands[2]);
2684    }
2685  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2686				      operands[4], operands[5]));
2687  emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2688						  operands[2], mask));
2689  DONE;
2690})
2691
2692(define_expand "vcondu<mode><v_cmp_mixed>"
2693  [(set (match_operand:VDQF 0 "register_operand")
2694	(if_then_else:VDQF
2695	  (match_operator 3 "comparison_operator"
2696	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
2697	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2698	  (match_operand:VDQF 1 "nonmemory_operand")
2699	  (match_operand:VDQF 2 "nonmemory_operand")))]
2700  "TARGET_SIMD"
2701{
2702  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2703  enum rtx_code code = GET_CODE (operands[3]);
2704
2705  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2706     it as well as switch operands 1/2 in order to avoid the additional
2707     NOT instruction.  */
2708  if (code == NE)
2709    {
2710      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2711				    operands[4], operands[5]);
2712      std::swap (operands[1], operands[2]);
2713    }
2714  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2715						  mask, operands[3],
2716						  operands[4], operands[5]));
2717  emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2718						  operands[2], mask));
2719  DONE;
2720})
2721
2722;; Patterns for AArch64 SIMD Intrinsics.
2723
2724;; Lane extraction with sign extension to general purpose register.
2725(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2726  [(set (match_operand:GPI 0 "register_operand" "=r")
2727	(sign_extend:GPI
2728	  (vec_select:<VEL>
2729	    (match_operand:VDQQH 1 "register_operand" "w")
2730	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2731  "TARGET_SIMD"
2732  {
2733    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2734    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2735  }
2736  [(set_attr "type" "neon_to_gp<q>")]
2737)
2738
2739(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2740  [(set (match_operand:SI 0 "register_operand" "=r")
2741	(zero_extend:SI
2742	  (vec_select:<VEL>
2743	    (match_operand:VDQQH 1 "register_operand" "w")
2744	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2745  "TARGET_SIMD"
2746  {
2747    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2748    return "umov\\t%w0, %1.<Vetype>[%2]";
2749  }
2750  [(set_attr "type" "neon_to_gp<q>")]
2751)
2752
2753;; Lane extraction of a value, neither sign nor zero extension
2754;; is guaranteed so upper bits should be considered undefined.
2755;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2756(define_insn "aarch64_get_lane<mode>"
2757  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2758	(vec_select:<VEL>
2759	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2760	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2761  "TARGET_SIMD"
2762  {
2763    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2764    switch (which_alternative)
2765      {
2766	case 0:
2767	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2768	case 1:
2769	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2770	case 2:
2771	  return "st1\\t{%1.<Vetype>}[%2], %0";
2772	default:
2773	  gcc_unreachable ();
2774      }
2775  }
2776  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2777)
2778
2779;; In this insn, operand 1 should be low, and operand 2 the high part of the
2780;; dest vector.
2781
2782(define_insn "*aarch64_combinez<mode>"
2783  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2784        (vec_concat:<VDBL>
2785	   (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2786	   (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2787  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2788  "@
2789   mov\\t%0.8b, %1.8b
2790   fmov\t%d0, %1
2791   ldr\\t%d0, %1"
2792  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2793   (set_attr "simd" "yes,*,yes")
2794   (set_attr "fp" "*,yes,*")]
2795)
2796
2797(define_insn "*aarch64_combinez_be<mode>"
2798  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2799        (vec_concat:<VDBL>
2800	   (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2801	   (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2802  "TARGET_SIMD && BYTES_BIG_ENDIAN"
2803  "@
2804   mov\\t%0.8b, %1.8b
2805   fmov\t%d0, %1
2806   ldr\\t%d0, %1"
2807  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2808   (set_attr "simd" "yes,*,yes")
2809   (set_attr "fp" "*,yes,*")]
2810)
2811
2812(define_expand "aarch64_combine<mode>"
2813  [(match_operand:<VDBL> 0 "register_operand")
2814   (match_operand:VDC 1 "register_operand")
2815   (match_operand:VDC 2 "register_operand")]
2816  "TARGET_SIMD"
2817{
2818  rtx op1, op2;
2819  if (BYTES_BIG_ENDIAN)
2820    {
2821      op1 = operands[2];
2822      op2 = operands[1];
2823    }
2824  else
2825    {
2826      op1 = operands[1];
2827      op2 = operands[2];
2828    }
2829  emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2830  DONE;
2831}
2832)
2833
2834(define_insn_and_split "aarch64_combine_internal<mode>"
2835  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2836        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2837			   (match_operand:VDC 2 "register_operand" "w")))]
2838  "TARGET_SIMD"
2839  "#"
2840  "&& reload_completed"
2841  [(const_int 0)]
2842{
2843  if (BYTES_BIG_ENDIAN)
2844    aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2845  else
2846    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2847  DONE;
2848}
2849[(set_attr "type" "multiple")]
2850)
2851
2852(define_expand "aarch64_simd_combine<mode>"
2853  [(match_operand:<VDBL> 0 "register_operand")
2854   (match_operand:VDC 1 "register_operand")
2855   (match_operand:VDC 2 "register_operand")]
2856  "TARGET_SIMD"
2857  {
2858    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2859    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2860    DONE;
2861  }
2862[(set_attr "type" "multiple")]
2863)
2864
2865;; <su><addsub>l<q>.
2866
2867(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2869       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2870			   (match_operand:VQW 1 "register_operand" "w")
2871			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2872		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2873			   (match_operand:VQW 2 "register_operand" "w")
2874			   (match_dup 3)))))]
2875  "TARGET_SIMD"
2876  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2877  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2878)
2879
2880(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2881 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2882       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2883                           (match_operand:VQW 1 "register_operand" "w")
2884                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2885                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2886                           (match_operand:VQW 2 "register_operand" "w")
2887                           (match_dup 3)))))]
2888  "TARGET_SIMD"
2889  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2890  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2891)
2892
2893
2894(define_expand "aarch64_saddl2<mode>"
2895  [(match_operand:<VWIDE> 0 "register_operand" "=w")
2896   (match_operand:VQW 1 "register_operand" "w")
2897   (match_operand:VQW 2 "register_operand" "w")]
2898  "TARGET_SIMD"
2899{
2900  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2901  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2902                                                  operands[2], p));
2903  DONE;
2904})
2905
2906(define_expand "aarch64_uaddl2<mode>"
2907  [(match_operand:<VWIDE> 0 "register_operand" "=w")
2908   (match_operand:VQW 1 "register_operand" "w")
2909   (match_operand:VQW 2 "register_operand" "w")]
2910  "TARGET_SIMD"
2911{
2912  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2913  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2914                                                  operands[2], p));
2915  DONE;
2916})
2917
2918(define_expand "aarch64_ssubl2<mode>"
2919  [(match_operand:<VWIDE> 0 "register_operand" "=w")
2920   (match_operand:VQW 1 "register_operand" "w")
2921   (match_operand:VQW 2 "register_operand" "w")]
2922  "TARGET_SIMD"
2923{
2924  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2925  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2926						operands[2], p));
2927  DONE;
2928})
2929
2930(define_expand "aarch64_usubl2<mode>"
2931  [(match_operand:<VWIDE> 0 "register_operand" "=w")
2932   (match_operand:VQW 1 "register_operand" "w")
2933   (match_operand:VQW 2 "register_operand" "w")]
2934  "TARGET_SIMD"
2935{
2936  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2937  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2938						operands[2], p));
2939  DONE;
2940})
2941
2942(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2943 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2944       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2945			   (match_operand:VD_BHSI 1 "register_operand" "w"))
2946		       (ANY_EXTEND:<VWIDE>
2947			   (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2948  "TARGET_SIMD"
2949  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2950  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2951)
2952
2953;; <su><addsub>w<q>.
2954
2955(define_expand "widen_ssum<mode>3"
2956  [(set (match_operand:<VDBLW> 0 "register_operand" "")
2957	(plus:<VDBLW> (sign_extend:<VDBLW>
2958		        (match_operand:VQW 1 "register_operand" ""))
2959		      (match_operand:<VDBLW> 2 "register_operand" "")))]
2960  "TARGET_SIMD"
2961  {
2962    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2963    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2964
2965    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2966						operands[1], p));
2967    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2968    DONE;
2969  }
2970)
2971
2972(define_expand "widen_ssum<mode>3"
2973  [(set (match_operand:<VWIDE> 0 "register_operand" "")
2974	(plus:<VWIDE> (sign_extend:<VWIDE>
2975		        (match_operand:VD_BHSI 1 "register_operand" ""))
2976		      (match_operand:<VWIDE> 2 "register_operand" "")))]
2977  "TARGET_SIMD"
2978{
2979  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2980  DONE;
2981})
2982
2983(define_expand "widen_usum<mode>3"
2984  [(set (match_operand:<VDBLW> 0 "register_operand" "")
2985	(plus:<VDBLW> (zero_extend:<VDBLW>
2986		        (match_operand:VQW 1 "register_operand" ""))
2987		      (match_operand:<VDBLW> 2 "register_operand" "")))]
2988  "TARGET_SIMD"
2989  {
2990    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2991    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2992
2993    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2994						 operands[1], p));
2995    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2996    DONE;
2997  }
2998)
2999
3000(define_expand "widen_usum<mode>3"
3001  [(set (match_operand:<VWIDE> 0 "register_operand" "")
3002	(plus:<VWIDE> (zero_extend:<VWIDE>
3003		        (match_operand:VD_BHSI 1 "register_operand" ""))
3004		      (match_operand:<VWIDE> 2 "register_operand" "")))]
3005  "TARGET_SIMD"
3006{
3007  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3008  DONE;
3009})
3010
3011(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3012  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3013        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3014			(ANY_EXTEND:<VWIDE>
3015			  (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3016  "TARGET_SIMD"
3017  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3018  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3019)
3020
3021(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3022  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3023        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3024			(ANY_EXTEND:<VWIDE>
3025			  (vec_select:<VHALF>
3026			   (match_operand:VQW 2 "register_operand" "w")
3027			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3028  "TARGET_SIMD"
3029  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3030  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3031)
3032
3033(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3034  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3035        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3036			(ANY_EXTEND:<VWIDE>
3037			  (vec_select:<VHALF>
3038			   (match_operand:VQW 2 "register_operand" "w")
3039			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3040  "TARGET_SIMD"
3041  "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3042  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3043)
3044
3045(define_expand "aarch64_saddw2<mode>"
3046  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3047   (match_operand:<VWIDE> 1 "register_operand" "w")
3048   (match_operand:VQW 2 "register_operand" "w")]
3049  "TARGET_SIMD"
3050{
3051  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3052  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3053						operands[2], p));
3054  DONE;
3055})
3056
3057(define_expand "aarch64_uaddw2<mode>"
3058  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3059   (match_operand:<VWIDE> 1 "register_operand" "w")
3060   (match_operand:VQW 2 "register_operand" "w")]
3061  "TARGET_SIMD"
3062{
3063  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3064  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3065						operands[2], p));
3066  DONE;
3067})
3068
3069
3070(define_expand "aarch64_ssubw2<mode>"
3071  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3072   (match_operand:<VWIDE> 1 "register_operand" "w")
3073   (match_operand:VQW 2 "register_operand" "w")]
3074  "TARGET_SIMD"
3075{
3076  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3077  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3078						operands[2], p));
3079  DONE;
3080})
3081
3082(define_expand "aarch64_usubw2<mode>"
3083  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3084   (match_operand:<VWIDE> 1 "register_operand" "w")
3085   (match_operand:VQW 2 "register_operand" "w")]
3086  "TARGET_SIMD"
3087{
3088  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3089  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3090						operands[2], p));
3091  DONE;
3092})
3093
3094;; <su><r>h<addsub>.
3095
3096(define_insn "aarch64_<sur>h<addsub><mode>"
3097  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3098        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3099		      (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3100		     HADDSUB))]
3101  "TARGET_SIMD"
3102  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3103  [(set_attr "type" "neon_<addsub>_halve<q>")]
3104)
3105
3106;; <r><addsub>hn<q>.
3107
3108(define_insn "aarch64_<sur><addsub>hn<mode>"
3109  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3110        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3111			    (match_operand:VQN 2 "register_operand" "w")]
3112                           ADDSUBHN))]
3113  "TARGET_SIMD"
3114  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3115  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3116)
3117
3118(define_insn "aarch64_<sur><addsub>hn2<mode>"
3119  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3120        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3121			     (match_operand:VQN 2 "register_operand" "w")
3122			     (match_operand:VQN 3 "register_operand" "w")]
3123                            ADDSUBHN2))]
3124  "TARGET_SIMD"
3125  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3126  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3127)
3128
3129;; pmul.
3130
3131(define_insn "aarch64_pmul<mode>"
3132  [(set (match_operand:VB 0 "register_operand" "=w")
3133        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3134		    (match_operand:VB 2 "register_operand" "w")]
3135		   UNSPEC_PMUL))]
3136 "TARGET_SIMD"
3137 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3138  [(set_attr "type" "neon_mul_<Vetype><q>")]
3139)
3140
3141;; fmulx.
3142
3143(define_insn "aarch64_fmulx<mode>"
3144  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3145	(unspec:VHSDF_HSDF
3146	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3147	   (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3148	   UNSPEC_FMULX))]
3149 "TARGET_SIMD"
3150 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3151 [(set_attr "type" "neon_fp_mul_<stype>")]
3152)
3153
3154;; vmulxq_lane_f32, and vmulx_laneq_f32
3155
3156(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3157  [(set (match_operand:VDQSF 0 "register_operand" "=w")
3158	(unspec:VDQSF
3159	 [(match_operand:VDQSF 1 "register_operand" "w")
3160	  (vec_duplicate:VDQSF
3161	   (vec_select:<VEL>
3162	    (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3163	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3164	 UNSPEC_FMULX))]
3165  "TARGET_SIMD"
3166  {
3167    operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3168					  INTVAL (operands[3])));
3169    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3170  }
3171  [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3172)
3173
3174;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3175
3176(define_insn "*aarch64_mulx_elt<mode>"
3177  [(set (match_operand:VDQF 0 "register_operand" "=w")
3178	(unspec:VDQF
3179	 [(match_operand:VDQF 1 "register_operand" "w")
3180	  (vec_duplicate:VDQF
3181	   (vec_select:<VEL>
3182	    (match_operand:VDQF 2 "register_operand" "w")
3183	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3184	 UNSPEC_FMULX))]
3185  "TARGET_SIMD"
3186  {
3187    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3188    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3189  }
3190  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3191)
3192
3193;; vmulxq_lane
3194
3195(define_insn "*aarch64_mulx_elt_from_dup<mode>"
3196  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3197	(unspec:VHSDF
3198	 [(match_operand:VHSDF 1 "register_operand" "w")
3199	  (vec_duplicate:VHSDF
3200	    (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3201	 UNSPEC_FMULX))]
3202  "TARGET_SIMD"
3203  "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3204  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3205)
3206
3207;; vmulxs_lane_f32, vmulxs_laneq_f32
3208;; vmulxd_lane_f64 ==  vmulx_lane_f64
3209;; vmulxd_laneq_f64 == vmulx_laneq_f64
3210
3211(define_insn "*aarch64_vgetfmulx<mode>"
3212  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3213	(unspec:<VEL>
3214	 [(match_operand:<VEL> 1 "register_operand" "w")
3215	  (vec_select:<VEL>
3216	   (match_operand:VDQF 2 "register_operand" "w")
3217	    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3218	 UNSPEC_FMULX))]
3219  "TARGET_SIMD"
3220  {
3221    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3222    return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3223  }
3224  [(set_attr "type" "fmul<Vetype>")]
3225)
3226;; <su>q<addsub>
3227
3228(define_insn "aarch64_<su_optab><optab><mode>"
3229  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3230	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3231			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
3232  "TARGET_SIMD"
3233  "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3234  [(set_attr "type" "neon_<optab><q>")]
3235)
3236
3237;; suqadd and usqadd
3238
3239(define_insn "aarch64_<sur>qadd<mode>"
3240  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3241	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3242			(match_operand:VSDQ_I 2 "register_operand" "w")]
3243		       USSUQADD))]
3244  "TARGET_SIMD"
3245  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3246  [(set_attr "type" "neon_qadd<q>")]
3247)
3248
3249;; sqmovun
3250
3251(define_insn "aarch64_sqmovun<mode>"
3252  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3253	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3254                            UNSPEC_SQXTUN))]
3255   "TARGET_SIMD"
3256   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3257   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3258)
3259
3260;; sqmovn and uqmovn
3261
3262(define_insn "aarch64_<sur>qmovn<mode>"
3263  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3264	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3265                            SUQMOVN))]
3266  "TARGET_SIMD"
3267  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3268   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3269)
3270
3271;; <su>q<absneg>
3272
3273(define_insn "aarch64_s<optab><mode>"
3274  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3275	(UNQOPS:VSDQ_I
3276	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
3277  "TARGET_SIMD"
3278  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3279  [(set_attr "type" "neon_<optab><q>")]
3280)
3281
3282;; sq<r>dmulh.
3283
3284(define_insn "aarch64_sq<r>dmulh<mode>"
3285  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3286	(unspec:VSDQ_HSI
3287	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3288	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3289	 VQDMULH))]
3290  "TARGET_SIMD"
3291  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3292  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3293)
3294
3295;; sq<r>dmulh_lane
3296
3297(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3298  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3299        (unspec:VDQHS
3300	  [(match_operand:VDQHS 1 "register_operand" "w")
3301           (vec_select:<VEL>
3302             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3303             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3304	 VQDMULH))]
3305  "TARGET_SIMD"
3306  "*
3307   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3308   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3309  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3310)
3311
3312(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3313  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3314        (unspec:VDQHS
3315	  [(match_operand:VDQHS 1 "register_operand" "w")
3316           (vec_select:<VEL>
3317             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3318             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3319	 VQDMULH))]
3320  "TARGET_SIMD"
3321  "*
3322   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3323   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3324  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3325)
3326
3327(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3328  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3329        (unspec:SD_HSI
3330	  [(match_operand:SD_HSI 1 "register_operand" "w")
3331           (vec_select:<VEL>
3332             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3333             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3334	 VQDMULH))]
3335  "TARGET_SIMD"
3336  "*
3337   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3338   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3339  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3340)
3341
3342(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3343  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3344        (unspec:SD_HSI
3345	  [(match_operand:SD_HSI 1 "register_operand" "w")
3346           (vec_select:<VEL>
3347             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3348             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3349	 VQDMULH))]
3350  "TARGET_SIMD"
3351  "*
3352   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3353   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3354  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3355)
3356
3357;; sqrdml[as]h.
3358
3359(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3360  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3361	(unspec:VSDQ_HSI
3362	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3363	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
3364	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3365	  SQRDMLH_AS))]
3366   "TARGET_SIMD_RDMA"
3367   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3368   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3369)
3370
3371;; sqrdml[as]h_lane.
3372
3373(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3374  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3375	(unspec:VDQHS
3376	  [(match_operand:VDQHS 1 "register_operand" "0")
3377	   (match_operand:VDQHS 2 "register_operand" "w")
3378	   (vec_select:<VEL>
3379	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3380	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3381	  SQRDMLH_AS))]
3382   "TARGET_SIMD_RDMA"
3383   {
3384     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3385     return
3386      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3387   }
3388   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3389)
3390
3391(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3392  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3393	(unspec:SD_HSI
3394	  [(match_operand:SD_HSI 1 "register_operand" "0")
3395	   (match_operand:SD_HSI 2 "register_operand" "w")
3396	   (vec_select:<VEL>
3397	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3398	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3399	  SQRDMLH_AS))]
3400   "TARGET_SIMD_RDMA"
3401   {
3402     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3403     return
3404      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3405   }
3406   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3407)
3408
3409;; sqrdml[as]h_laneq.
3410
3411(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3412  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3413	(unspec:VDQHS
3414	  [(match_operand:VDQHS 1 "register_operand" "0")
3415	   (match_operand:VDQHS 2 "register_operand" "w")
3416	   (vec_select:<VEL>
3417	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3418	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3419	  SQRDMLH_AS))]
3420   "TARGET_SIMD_RDMA"
3421   {
3422     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3423     return
3424      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3425   }
3426   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3427)
3428
3429(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3430  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3431	(unspec:SD_HSI
3432	  [(match_operand:SD_HSI 1 "register_operand" "0")
3433	   (match_operand:SD_HSI 2 "register_operand" "w")
3434	   (vec_select:<VEL>
3435	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3436	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3437	  SQRDMLH_AS))]
3438   "TARGET_SIMD_RDMA"
3439   {
3440     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3441     return
3442      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3443   }
3444   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3445)
3446
3447;; vqdml[sa]l
3448
3449(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3450  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3451        (SBINQOPS:<VWIDE>
3452	  (match_operand:<VWIDE> 1 "register_operand" "0")
3453	  (ss_ashift:<VWIDE>
3454	      (mult:<VWIDE>
3455		(sign_extend:<VWIDE>
3456		      (match_operand:VSD_HSI 2 "register_operand" "w"))
3457		(sign_extend:<VWIDE>
3458		      (match_operand:VSD_HSI 3 "register_operand" "w")))
3459	      (const_int 1))))]
3460  "TARGET_SIMD"
3461  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3462  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3463)
3464
3465;; vqdml[sa]l_lane
3466
3467(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3468  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3469        (SBINQOPS:<VWIDE>
3470	  (match_operand:<VWIDE> 1 "register_operand" "0")
3471	  (ss_ashift:<VWIDE>
3472	    (mult:<VWIDE>
3473	      (sign_extend:<VWIDE>
3474		(match_operand:VD_HSI 2 "register_operand" "w"))
3475	      (sign_extend:<VWIDE>
3476		(vec_duplicate:VD_HSI
3477		  (vec_select:<VEL>
3478		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3479		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3480              ))
3481	    (const_int 1))))]
3482  "TARGET_SIMD"
3483  {
3484    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3485    return
3486      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3487  }
3488  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3489)
3490
3491(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3492  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3493        (SBINQOPS:<VWIDE>
3494	  (match_operand:<VWIDE> 1 "register_operand" "0")
3495	  (ss_ashift:<VWIDE>
3496	    (mult:<VWIDE>
3497	      (sign_extend:<VWIDE>
3498		(match_operand:VD_HSI 2 "register_operand" "w"))
3499	      (sign_extend:<VWIDE>
3500		(vec_duplicate:VD_HSI
3501		  (vec_select:<VEL>
3502		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3503		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3504              ))
3505	    (const_int 1))))]
3506  "TARGET_SIMD"
3507  {
3508    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3509    return
3510      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3511  }
3512  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3513)
3514
3515(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3516  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3517        (SBINQOPS:<VWIDE>
3518	  (match_operand:<VWIDE> 1 "register_operand" "0")
3519	  (ss_ashift:<VWIDE>
3520	    (mult:<VWIDE>
3521	      (sign_extend:<VWIDE>
3522		(match_operand:SD_HSI 2 "register_operand" "w"))
3523	      (sign_extend:<VWIDE>
3524		(vec_select:<VEL>
3525		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3526		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3527              )
3528	    (const_int 1))))]
3529  "TARGET_SIMD"
3530  {
3531    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3532    return
3533      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3534  }
3535  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3536)
3537
3538(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3539  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3540        (SBINQOPS:<VWIDE>
3541	  (match_operand:<VWIDE> 1 "register_operand" "0")
3542	  (ss_ashift:<VWIDE>
3543	    (mult:<VWIDE>
3544	      (sign_extend:<VWIDE>
3545		(match_operand:SD_HSI 2 "register_operand" "w"))
3546	      (sign_extend:<VWIDE>
3547		(vec_select:<VEL>
3548		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3549		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3550              )
3551	    (const_int 1))))]
3552  "TARGET_SIMD"
3553  {
3554    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3555    return
3556      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3557  }
3558  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3559)
3560
3561;; vqdml[sa]l_n
3562
3563(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3564  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565        (SBINQOPS:<VWIDE>
3566	  (match_operand:<VWIDE> 1 "register_operand" "0")
3567	  (ss_ashift:<VWIDE>
3568	      (mult:<VWIDE>
3569		(sign_extend:<VWIDE>
3570		      (match_operand:VD_HSI 2 "register_operand" "w"))
3571		(sign_extend:<VWIDE>
3572		  (vec_duplicate:VD_HSI
3573		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3574	      (const_int 1))))]
3575  "TARGET_SIMD"
3576  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3577  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3578)
3579
3580;; sqdml[as]l2
3581
3582(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3583  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3584        (SBINQOPS:<VWIDE>
3585         (match_operand:<VWIDE> 1 "register_operand" "0")
3586         (ss_ashift:<VWIDE>
3587             (mult:<VWIDE>
3588               (sign_extend:<VWIDE>
3589                 (vec_select:<VHALF>
3590                     (match_operand:VQ_HSI 2 "register_operand" "w")
3591                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3592               (sign_extend:<VWIDE>
3593                 (vec_select:<VHALF>
3594                     (match_operand:VQ_HSI 3 "register_operand" "w")
3595                     (match_dup 4))))
3596             (const_int 1))))]
3597  "TARGET_SIMD"
3598  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3599  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3600)
3601
3602(define_expand "aarch64_sqdmlal2<mode>"
3603  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3604   (match_operand:<VWIDE> 1 "register_operand" "w")
3605   (match_operand:VQ_HSI 2 "register_operand" "w")
3606   (match_operand:VQ_HSI 3 "register_operand" "w")]
3607  "TARGET_SIMD"
3608{
3609  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3610  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3611						  operands[2], operands[3], p));
3612  DONE;
3613})
3614
3615(define_expand "aarch64_sqdmlsl2<mode>"
3616  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3617   (match_operand:<VWIDE> 1 "register_operand" "w")
3618   (match_operand:VQ_HSI 2 "register_operand" "w")
3619   (match_operand:VQ_HSI 3 "register_operand" "w")]
3620  "TARGET_SIMD"
3621{
3622  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3623  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3624						  operands[2], operands[3], p));
3625  DONE;
3626})
3627
3628;; vqdml[sa]l2_lane
3629
3630(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3631  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3632        (SBINQOPS:<VWIDE>
3633	  (match_operand:<VWIDE> 1 "register_operand" "0")
3634	  (ss_ashift:<VWIDE>
3635	      (mult:<VWIDE>
3636		(sign_extend:<VWIDE>
3637                  (vec_select:<VHALF>
3638                    (match_operand:VQ_HSI 2 "register_operand" "w")
3639                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3640		(sign_extend:<VWIDE>
3641                  (vec_duplicate:<VHALF>
3642		    (vec_select:<VEL>
3643		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3644		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3645		    ))))
3646	      (const_int 1))))]
3647  "TARGET_SIMD"
3648  {
3649    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3650    return
3651     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3652  }
3653  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654)
3655
3656(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3657  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3658        (SBINQOPS:<VWIDE>
3659	  (match_operand:<VWIDE> 1 "register_operand" "0")
3660	  (ss_ashift:<VWIDE>
3661	      (mult:<VWIDE>
3662		(sign_extend:<VWIDE>
3663                  (vec_select:<VHALF>
3664                    (match_operand:VQ_HSI 2 "register_operand" "w")
3665                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3666		(sign_extend:<VWIDE>
3667                  (vec_duplicate:<VHALF>
3668		    (vec_select:<VEL>
3669		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3670		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3671		    ))))
3672	      (const_int 1))))]
3673  "TARGET_SIMD"
3674  {
3675    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3676    return
3677     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3678  }
3679  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3680)
3681
3682(define_expand "aarch64_sqdmlal2_lane<mode>"
3683  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3684   (match_operand:<VWIDE> 1 "register_operand" "w")
3685   (match_operand:VQ_HSI 2 "register_operand" "w")
3686   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3687   (match_operand:SI 4 "immediate_operand" "i")]
3688  "TARGET_SIMD"
3689{
3690  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3691  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3692						       operands[2], operands[3],
3693						       operands[4], p));
3694  DONE;
3695})
3696
3697(define_expand "aarch64_sqdmlal2_laneq<mode>"
3698  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3699   (match_operand:<VWIDE> 1 "register_operand" "w")
3700   (match_operand:VQ_HSI 2 "register_operand" "w")
3701   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3702   (match_operand:SI 4 "immediate_operand" "i")]
3703  "TARGET_SIMD"
3704{
3705  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3706  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3707						       operands[2], operands[3],
3708						       operands[4], p));
3709  DONE;
3710})
3711
3712(define_expand "aarch64_sqdmlsl2_lane<mode>"
3713  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3714   (match_operand:<VWIDE> 1 "register_operand" "w")
3715   (match_operand:VQ_HSI 2 "register_operand" "w")
3716   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3717   (match_operand:SI 4 "immediate_operand" "i")]
3718  "TARGET_SIMD"
3719{
3720  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3721  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3722						       operands[2], operands[3],
3723						       operands[4], p));
3724  DONE;
3725})
3726
3727(define_expand "aarch64_sqdmlsl2_laneq<mode>"
3728  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3729   (match_operand:<VWIDE> 1 "register_operand" "w")
3730   (match_operand:VQ_HSI 2 "register_operand" "w")
3731   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3732   (match_operand:SI 4 "immediate_operand" "i")]
3733  "TARGET_SIMD"
3734{
3735  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3736  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3737						       operands[2], operands[3],
3738						       operands[4], p));
3739  DONE;
3740})
3741
3742(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3743  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3744        (SBINQOPS:<VWIDE>
3745	  (match_operand:<VWIDE> 1 "register_operand" "0")
3746	  (ss_ashift:<VWIDE>
3747	    (mult:<VWIDE>
3748	      (sign_extend:<VWIDE>
3749                (vec_select:<VHALF>
3750                  (match_operand:VQ_HSI 2 "register_operand" "w")
3751                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3752	      (sign_extend:<VWIDE>
3753                (vec_duplicate:<VHALF>
3754		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3755	    (const_int 1))))]
3756  "TARGET_SIMD"
3757  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3758  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3759)
3760
3761(define_expand "aarch64_sqdmlal2_n<mode>"
3762  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3763   (match_operand:<VWIDE> 1 "register_operand" "w")
3764   (match_operand:VQ_HSI 2 "register_operand" "w")
3765   (match_operand:<VEL> 3 "register_operand" "w")]
3766  "TARGET_SIMD"
3767{
3768  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3769  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3770						    operands[2], operands[3],
3771						    p));
3772  DONE;
3773})
3774
3775(define_expand "aarch64_sqdmlsl2_n<mode>"
3776  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3777   (match_operand:<VWIDE> 1 "register_operand" "w")
3778   (match_operand:VQ_HSI 2 "register_operand" "w")
3779   (match_operand:<VEL> 3 "register_operand" "w")]
3780  "TARGET_SIMD"
3781{
3782  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3783  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3784						    operands[2], operands[3],
3785						    p));
3786  DONE;
3787})
3788
3789;; vqdmull
3790
3791(define_insn "aarch64_sqdmull<mode>"
3792  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3793        (ss_ashift:<VWIDE>
3794	     (mult:<VWIDE>
3795	       (sign_extend:<VWIDE>
3796		     (match_operand:VSD_HSI 1 "register_operand" "w"))
3797	       (sign_extend:<VWIDE>
3798		     (match_operand:VSD_HSI 2 "register_operand" "w")))
3799	     (const_int 1)))]
3800  "TARGET_SIMD"
3801  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3802  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3803)
3804
3805;; vqdmull_lane
3806
3807(define_insn "aarch64_sqdmull_lane<mode>"
3808  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3809        (ss_ashift:<VWIDE>
3810	     (mult:<VWIDE>
3811	       (sign_extend:<VWIDE>
3812		 (match_operand:VD_HSI 1 "register_operand" "w"))
3813	       (sign_extend:<VWIDE>
3814                 (vec_duplicate:VD_HSI
3815                   (vec_select:<VEL>
3816		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3817		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3818	       ))
3819	     (const_int 1)))]
3820  "TARGET_SIMD"
3821  {
3822    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3823    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3824  }
3825  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3826)
3827
3828(define_insn "aarch64_sqdmull_laneq<mode>"
3829  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830        (ss_ashift:<VWIDE>
3831	     (mult:<VWIDE>
3832	       (sign_extend:<VWIDE>
3833		 (match_operand:VD_HSI 1 "register_operand" "w"))
3834	       (sign_extend:<VWIDE>
3835                 (vec_duplicate:VD_HSI
3836                   (vec_select:<VEL>
3837		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3838		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3839	       ))
3840	     (const_int 1)))]
3841  "TARGET_SIMD"
3842  {
3843    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3844    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3845  }
3846  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3847)
3848
3849(define_insn "aarch64_sqdmull_lane<mode>"
3850  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3851        (ss_ashift:<VWIDE>
3852	     (mult:<VWIDE>
3853	       (sign_extend:<VWIDE>
3854		 (match_operand:SD_HSI 1 "register_operand" "w"))
3855	       (sign_extend:<VWIDE>
3856                 (vec_select:<VEL>
3857		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3858		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3859	       ))
3860	     (const_int 1)))]
3861  "TARGET_SIMD"
3862  {
3863    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3864    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3865  }
3866  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3867)
3868
3869(define_insn "aarch64_sqdmull_laneq<mode>"
3870  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3871        (ss_ashift:<VWIDE>
3872	     (mult:<VWIDE>
3873	       (sign_extend:<VWIDE>
3874		 (match_operand:SD_HSI 1 "register_operand" "w"))
3875	       (sign_extend:<VWIDE>
3876                 (vec_select:<VEL>
3877		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3878		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3879	       ))
3880	     (const_int 1)))]
3881  "TARGET_SIMD"
3882  {
3883    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3884    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3885  }
3886  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3887)
3888
3889;; vqdmull_n
3890
3891(define_insn "aarch64_sqdmull_n<mode>"
3892  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3893        (ss_ashift:<VWIDE>
3894	     (mult:<VWIDE>
3895	       (sign_extend:<VWIDE>
3896		 (match_operand:VD_HSI 1 "register_operand" "w"))
3897	       (sign_extend:<VWIDE>
3898                 (vec_duplicate:VD_HSI
3899                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3900	       )
3901	     (const_int 1)))]
3902  "TARGET_SIMD"
3903  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3904  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3905)
3906
3907;; vqdmull2
3908
3909
3910
3911(define_insn "aarch64_sqdmull2<mode>_internal"
3912  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3913        (ss_ashift:<VWIDE>
3914	     (mult:<VWIDE>
3915	       (sign_extend:<VWIDE>
3916		 (vec_select:<VHALF>
3917                   (match_operand:VQ_HSI 1 "register_operand" "w")
3918                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3919	       (sign_extend:<VWIDE>
3920		 (vec_select:<VHALF>
3921                   (match_operand:VQ_HSI 2 "register_operand" "w")
3922                   (match_dup 3)))
3923	       )
3924	     (const_int 1)))]
3925  "TARGET_SIMD"
3926  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3927  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3928)
3929
3930(define_expand "aarch64_sqdmull2<mode>"
3931  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3932   (match_operand:VQ_HSI 1 "register_operand" "w")
3933   (match_operand:VQ_HSI 2 "register_operand" "w")]
3934  "TARGET_SIMD"
3935{
3936  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3937  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3938						  operands[2], p));
3939  DONE;
3940})
3941
3942;; vqdmull2_lane
3943
3944(define_insn "aarch64_sqdmull2_lane<mode>_internal"
3945  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3946        (ss_ashift:<VWIDE>
3947	     (mult:<VWIDE>
3948	       (sign_extend:<VWIDE>
3949		 (vec_select:<VHALF>
3950                   (match_operand:VQ_HSI 1 "register_operand" "w")
3951                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3952	       (sign_extend:<VWIDE>
3953                 (vec_duplicate:<VHALF>
3954                   (vec_select:<VEL>
3955		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3956		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3957	       ))
3958	     (const_int 1)))]
3959  "TARGET_SIMD"
3960  {
3961    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3962    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3963  }
3964  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3965)
3966
3967(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3968  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3969        (ss_ashift:<VWIDE>
3970	     (mult:<VWIDE>
3971	       (sign_extend:<VWIDE>
3972		 (vec_select:<VHALF>
3973                   (match_operand:VQ_HSI 1 "register_operand" "w")
3974                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3975	       (sign_extend:<VWIDE>
3976                 (vec_duplicate:<VHALF>
3977                   (vec_select:<VEL>
3978		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3979		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3980	       ))
3981	     (const_int 1)))]
3982  "TARGET_SIMD"
3983  {
3984    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3985    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3986  }
3987  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3988)
3989
3990(define_expand "aarch64_sqdmull2_lane<mode>"
3991  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3992   (match_operand:VQ_HSI 1 "register_operand" "w")
3993   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3994   (match_operand:SI 3 "immediate_operand" "i")]
3995  "TARGET_SIMD"
3996{
3997  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3998  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3999						       operands[2], operands[3],
4000						       p));
4001  DONE;
4002})
4003
4004(define_expand "aarch64_sqdmull2_laneq<mode>"
4005  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4006   (match_operand:VQ_HSI 1 "register_operand" "w")
4007   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4008   (match_operand:SI 3 "immediate_operand" "i")]
4009  "TARGET_SIMD"
4010{
4011  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4012  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4013						       operands[2], operands[3],
4014						       p));
4015  DONE;
4016})
4017
4018;; vqdmull2_n
4019
4020(define_insn "aarch64_sqdmull2_n<mode>_internal"
4021  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4022        (ss_ashift:<VWIDE>
4023	     (mult:<VWIDE>
4024	       (sign_extend:<VWIDE>
4025		 (vec_select:<VHALF>
4026                   (match_operand:VQ_HSI 1 "register_operand" "w")
4027                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4028	       (sign_extend:<VWIDE>
4029                 (vec_duplicate:<VHALF>
4030                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4031	       )
4032	     (const_int 1)))]
4033  "TARGET_SIMD"
4034  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4035  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4036)
4037
4038(define_expand "aarch64_sqdmull2_n<mode>"
4039  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040   (match_operand:VQ_HSI 1 "register_operand" "w")
4041   (match_operand:<VEL> 2 "register_operand" "w")]
4042  "TARGET_SIMD"
4043{
4044  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4045  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4046						    operands[2], p));
4047  DONE;
4048})
4049
4050;; vshl
4051
4052(define_insn "aarch64_<sur>shl<mode>"
4053  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4054        (unspec:VSDQ_I_DI
4055	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4056           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4057         VSHL))]
4058  "TARGET_SIMD"
4059  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4060  [(set_attr "type" "neon_shift_reg<q>")]
4061)
4062
4063
4064;; vqshl
4065
4066(define_insn "aarch64_<sur>q<r>shl<mode>"
4067  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4068        (unspec:VSDQ_I
4069	  [(match_operand:VSDQ_I 1 "register_operand" "w")
4070           (match_operand:VSDQ_I 2 "register_operand" "w")]
4071         VQSHL))]
4072  "TARGET_SIMD"
4073  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4074  [(set_attr "type" "neon_sat_shift_reg<q>")]
4075)
4076
4077;; vshll_n
4078
4079(define_insn "aarch64_<sur>shll_n<mode>"
4080  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4081	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4082			 (match_operand:SI 2
4083			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4084                         VSHLL))]
4085  "TARGET_SIMD"
4086  {
4087    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4088      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4089    else
4090      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4091  }
4092  [(set_attr "type" "neon_shift_imm_long")]
4093)
4094
4095;; vshll_high_n
4096
4097(define_insn "aarch64_<sur>shll2_n<mode>"
4098  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4099	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4100			 (match_operand:SI 2 "immediate_operand" "i")]
4101                         VSHLL))]
4102  "TARGET_SIMD"
4103  {
4104    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4105      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4106    else
4107      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4108  }
4109  [(set_attr "type" "neon_shift_imm_long")]
4110)
4111
4112;; vrshr_n
4113
4114(define_insn "aarch64_<sur>shr_n<mode>"
4115  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4116        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4117			   (match_operand:SI 2
4118			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4119			  VRSHR_N))]
4120  "TARGET_SIMD"
4121  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4122  [(set_attr "type" "neon_sat_shift_imm<q>")]
4123)
4124
4125;; v(r)sra_n
4126
4127(define_insn "aarch64_<sur>sra_n<mode>"
4128  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4129	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4130		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4131                       (match_operand:SI 3
4132			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4133                      VSRA))]
4134  "TARGET_SIMD"
4135  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4136  [(set_attr "type" "neon_shift_acc<q>")]
4137)
4138
4139;; vs<lr>i_n
4140
4141(define_insn "aarch64_<sur>s<lr>i_n<mode>"
4142  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4143	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4144		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4145                       (match_operand:SI 3
4146			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4147                      VSLRI))]
4148  "TARGET_SIMD"
4149  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4150  [(set_attr "type" "neon_shift_imm<q>")]
4151)
4152
4153;; vqshl(u)
4154
4155(define_insn "aarch64_<sur>qshl<u>_n<mode>"
4156  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4157	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4158		       (match_operand:SI 2
4159			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4160                      VQSHL_N))]
4161  "TARGET_SIMD"
4162  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4163  [(set_attr "type" "neon_sat_shift_imm<q>")]
4164)
4165
4166
4167;; vq(r)shr(u)n_n
4168
4169(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4170  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4171        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4172			    (match_operand:SI 2
4173			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4174			   VQSHRN_N))]
4175  "TARGET_SIMD"
4176  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4177  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4178)
4179
4180
4181;; cm(eq|ge|gt|lt|le)
4182;; Note, we have constraints for Dz and Z as different expanders
4183;; have different ideas of what should be passed to this pattern.
4184
4185(define_insn "aarch64_cm<optab><mode>"
4186  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4187	(neg:<V_cmp_result>
4188	  (COMPARISONS:<V_cmp_result>
4189	    (match_operand:VDQ_I 1 "register_operand" "w,w")
4190	    (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4191	  )))]
4192  "TARGET_SIMD"
4193  "@
4194  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4195  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4196  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4197)
4198
4199(define_insn_and_split "aarch64_cm<optab>di"
4200  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4201	(neg:DI
4202	  (COMPARISONS:DI
4203	    (match_operand:DI 1 "register_operand" "w,w,r")
4204	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4205	  )))
4206     (clobber (reg:CC CC_REGNUM))]
4207  "TARGET_SIMD"
4208  "#"
4209  "reload_completed"
4210  [(set (match_operand:DI 0 "register_operand")
4211	(neg:DI
4212	  (COMPARISONS:DI
4213	    (match_operand:DI 1 "register_operand")
4214	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4215	  )))]
4216  {
4217    /* If we are in the general purpose register file,
4218       we split to a sequence of comparison and store.  */
4219    if (GP_REGNUM_P (REGNO (operands[0]))
4220	&& GP_REGNUM_P (REGNO (operands[1])))
4221      {
4222	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4223	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4224	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4225	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4226	DONE;
4227      }
4228    /* Otherwise, we expand to a similar pattern which does not
4229       clobber CC_REGNUM.  */
4230  }
4231  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4232)
4233
4234(define_insn "*aarch64_cm<optab>di"
4235  [(set (match_operand:DI 0 "register_operand" "=w,w")
4236	(neg:DI
4237	  (COMPARISONS:DI
4238	    (match_operand:DI 1 "register_operand" "w,w")
4239	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4240	  )))]
4241  "TARGET_SIMD && reload_completed"
4242  "@
4243  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4244  cm<optab>\t%d0, %d1, #0"
4245  [(set_attr "type" "neon_compare, neon_compare_zero")]
4246)
4247
4248;; cm(hs|hi)
4249
4250(define_insn "aarch64_cm<optab><mode>"
4251  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4252	(neg:<V_cmp_result>
4253	  (UCOMPARISONS:<V_cmp_result>
4254	    (match_operand:VDQ_I 1 "register_operand" "w")
4255	    (match_operand:VDQ_I 2 "register_operand" "w")
4256	  )))]
4257  "TARGET_SIMD"
4258  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4259  [(set_attr "type" "neon_compare<q>")]
4260)
4261
4262(define_insn_and_split "aarch64_cm<optab>di"
4263  [(set (match_operand:DI 0 "register_operand" "=w,r")
4264	(neg:DI
4265	  (UCOMPARISONS:DI
4266	    (match_operand:DI 1 "register_operand" "w,r")
4267	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4268	  )))
4269    (clobber (reg:CC CC_REGNUM))]
4270  "TARGET_SIMD"
4271  "#"
4272  "reload_completed"
4273  [(set (match_operand:DI 0 "register_operand")
4274	(neg:DI
4275	  (UCOMPARISONS:DI
4276	    (match_operand:DI 1 "register_operand")
4277	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4278	  )))]
4279  {
4280    /* If we are in the general purpose register file,
4281       we split to a sequence of comparison and store.  */
4282    if (GP_REGNUM_P (REGNO (operands[0]))
4283	&& GP_REGNUM_P (REGNO (operands[1])))
4284      {
4285	machine_mode mode = CCmode;
4286	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4287	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4288	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4289	DONE;
4290      }
4291    /* Otherwise, we expand to a similar pattern which does not
4292       clobber CC_REGNUM.  */
4293  }
4294  [(set_attr "type" "neon_compare,multiple")]
4295)
4296
4297(define_insn "*aarch64_cm<optab>di"
4298  [(set (match_operand:DI 0 "register_operand" "=w")
4299	(neg:DI
4300	  (UCOMPARISONS:DI
4301	    (match_operand:DI 1 "register_operand" "w")
4302	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4303	  )))]
4304  "TARGET_SIMD && reload_completed"
4305  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4306  [(set_attr "type" "neon_compare")]
4307)
4308
4309;; cmtst
4310
4311;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4312;; we don't have any insns using ne, and aarch64_vcond outputs
4313;; not (neg (eq (and x y) 0))
4314;; which is rewritten by simplify_rtx as
4315;; plus (eq (and x y) 0) -1.
4316
4317(define_insn "aarch64_cmtst<mode>"
4318  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4319	(plus:<V_cmp_result>
4320	  (eq:<V_cmp_result>
4321	    (and:VDQ_I
4322	      (match_operand:VDQ_I 1 "register_operand" "w")
4323	      (match_operand:VDQ_I 2 "register_operand" "w"))
4324	    (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4325	  (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4326  ]
4327  "TARGET_SIMD"
4328  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4329  [(set_attr "type" "neon_tst<q>")]
4330)
4331
4332(define_insn_and_split "aarch64_cmtstdi"
4333  [(set (match_operand:DI 0 "register_operand" "=w,r")
4334	(neg:DI
4335	  (ne:DI
4336	    (and:DI
4337	      (match_operand:DI 1 "register_operand" "w,r")
4338	      (match_operand:DI 2 "register_operand" "w,r"))
4339	    (const_int 0))))
4340    (clobber (reg:CC CC_REGNUM))]
4341  "TARGET_SIMD"
4342  "#"
4343  "reload_completed"
4344  [(set (match_operand:DI 0 "register_operand")
4345	(neg:DI
4346	  (ne:DI
4347	    (and:DI
4348	      (match_operand:DI 1 "register_operand")
4349	      (match_operand:DI 2 "register_operand"))
4350	    (const_int 0))))]
4351  {
4352    /* If we are in the general purpose register file,
4353       we split to a sequence of comparison and store.  */
4354    if (GP_REGNUM_P (REGNO (operands[0]))
4355	&& GP_REGNUM_P (REGNO (operands[1])))
4356      {
4357	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4358	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4359	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4360	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4361	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4362	DONE;
4363      }
4364    /* Otherwise, we expand to a similar pattern which does not
4365       clobber CC_REGNUM.  */
4366  }
4367  [(set_attr "type" "neon_tst,multiple")]
4368)
4369
4370(define_insn "*aarch64_cmtstdi"
4371  [(set (match_operand:DI 0 "register_operand" "=w")
4372	(neg:DI
4373	  (ne:DI
4374	    (and:DI
4375	      (match_operand:DI 1 "register_operand" "w")
4376	      (match_operand:DI 2 "register_operand" "w"))
4377	    (const_int 0))))]
4378  "TARGET_SIMD"
4379  "cmtst\t%d0, %d1, %d2"
4380  [(set_attr "type" "neon_tst")]
4381)
4382
4383;; fcm(eq|ge|gt|le|lt)
4384
4385(define_insn "aarch64_cm<optab><mode>"
4386  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4387	(neg:<V_cmp_result>
4388	  (COMPARISONS:<V_cmp_result>
4389	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4390	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4391	  )))]
4392  "TARGET_SIMD"
4393  "@
4394  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4395  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4396  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4397)
4398
4399;; fac(ge|gt)
4400;; Note we can also handle what would be fac(le|lt) by
4401;; generating fac(ge|gt).
4402
4403(define_insn "aarch64_fac<optab><mode>"
4404  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4405	(neg:<V_cmp_result>
4406	  (FAC_COMPARISONS:<V_cmp_result>
4407	    (abs:VHSDF_HSDF
4408	      (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4409	    (abs:VHSDF_HSDF
4410	      (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4411  )))]
4412  "TARGET_SIMD"
4413  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4414  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4415)
4416
4417;; addp
4418
4419(define_insn "aarch64_addp<mode>"
4420  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4421        (unspec:VD_BHSI
4422          [(match_operand:VD_BHSI 1 "register_operand" "w")
4423	   (match_operand:VD_BHSI 2 "register_operand" "w")]
4424          UNSPEC_ADDP))]
4425  "TARGET_SIMD"
4426  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4427  [(set_attr "type" "neon_reduc_add<q>")]
4428)
4429
4430(define_insn "aarch64_addpdi"
4431  [(set (match_operand:DI 0 "register_operand" "=w")
4432        (unspec:DI
4433          [(match_operand:V2DI 1 "register_operand" "w")]
4434          UNSPEC_ADDP))]
4435  "TARGET_SIMD"
4436  "addp\t%d0, %1.2d"
4437  [(set_attr "type" "neon_reduc_add")]
4438)
4439
4440;; sqrt
4441
4442(define_expand "sqrt<mode>2"
4443  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4444	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4445  "TARGET_SIMD"
4446{
4447  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4448    DONE;
4449})
4450
4451(define_insn "*sqrt<mode>2"
4452  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4453	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4454  "TARGET_SIMD"
4455  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4456  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4457)
4458
4459;; Patterns for vector struct loads and stores.
4460
4461(define_insn "aarch64_simd_ld2<mode>"
4462  [(set (match_operand:OI 0 "register_operand" "=w")
4463	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4464		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4465		   UNSPEC_LD2))]
4466  "TARGET_SIMD"
4467  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4468  [(set_attr "type" "neon_load2_2reg<q>")]
4469)
4470
4471(define_insn "aarch64_simd_ld2r<mode>"
4472  [(set (match_operand:OI 0 "register_operand" "=w")
4473       (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4474                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4475                  UNSPEC_LD2_DUP))]
4476  "TARGET_SIMD"
4477  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4478  [(set_attr "type" "neon_load2_all_lanes<q>")]
4479)
4480
4481(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4482  [(set (match_operand:OI 0 "register_operand" "=w")
4483	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4484		    (match_operand:OI 2 "register_operand" "0")
4485		    (match_operand:SI 3 "immediate_operand" "i")
4486		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4487		   UNSPEC_LD2_LANE))]
4488  "TARGET_SIMD"
4489  {
4490    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4491    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4492  }
4493  [(set_attr "type" "neon_load2_one_lane")]
4494)
4495
4496(define_expand "vec_load_lanesoi<mode>"
4497  [(set (match_operand:OI 0 "register_operand" "=w")
4498	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4499		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4500		   UNSPEC_LD2))]
4501  "TARGET_SIMD"
4502{
4503  if (BYTES_BIG_ENDIAN)
4504    {
4505      rtx tmp = gen_reg_rtx (OImode);
4506      rtx mask = aarch64_reverse_mask (<MODE>mode);
4507      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4508      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4509    }
4510  else
4511    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4512  DONE;
4513})
4514
4515(define_insn "aarch64_simd_st2<mode>"
4516  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4517	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4518                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4519                   UNSPEC_ST2))]
4520  "TARGET_SIMD"
4521  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4522  [(set_attr "type" "neon_store2_2reg<q>")]
4523)
4524
4525;; RTL uses GCC vector extension indices, so flip only for assembly.
4526(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4527  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4528	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4529		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4530		    (match_operand:SI 2 "immediate_operand" "i")]
4531		   UNSPEC_ST2_LANE))]
4532  "TARGET_SIMD"
4533  {
4534    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4535    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4536  }
4537  [(set_attr "type" "neon_store2_one_lane<q>")]
4538)
4539
4540(define_expand "vec_store_lanesoi<mode>"
4541  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4542	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4543                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544                   UNSPEC_ST2))]
4545  "TARGET_SIMD"
4546{
4547  if (BYTES_BIG_ENDIAN)
4548    {
4549      rtx tmp = gen_reg_rtx (OImode);
4550      rtx mask = aarch64_reverse_mask (<MODE>mode);
4551      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4552      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4553    }
4554  else
4555    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4556  DONE;
4557})
4558
4559(define_insn "aarch64_simd_ld3<mode>"
4560  [(set (match_operand:CI 0 "register_operand" "=w")
4561	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4562		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4563		   UNSPEC_LD3))]
4564  "TARGET_SIMD"
4565  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4566  [(set_attr "type" "neon_load3_3reg<q>")]
4567)
4568
4569(define_insn "aarch64_simd_ld3r<mode>"
4570  [(set (match_operand:CI 0 "register_operand" "=w")
4571       (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4572                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4573                  UNSPEC_LD3_DUP))]
4574  "TARGET_SIMD"
4575  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4576  [(set_attr "type" "neon_load3_all_lanes<q>")]
4577)
4578
4579(define_insn "aarch64_vec_load_lanesci_lane<mode>"
4580  [(set (match_operand:CI 0 "register_operand" "=w")
4581	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4582		    (match_operand:CI 2 "register_operand" "0")
4583		    (match_operand:SI 3 "immediate_operand" "i")
4584		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4585		   UNSPEC_LD3_LANE))]
4586  "TARGET_SIMD"
4587{
4588    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4589    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4590}
4591  [(set_attr "type" "neon_load3_one_lane")]
4592)
4593
4594(define_expand "vec_load_lanesci<mode>"
4595  [(set (match_operand:CI 0 "register_operand" "=w")
4596	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4597		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4598		   UNSPEC_LD3))]
4599  "TARGET_SIMD"
4600{
4601  if (BYTES_BIG_ENDIAN)
4602    {
4603      rtx tmp = gen_reg_rtx (CImode);
4604      rtx mask = aarch64_reverse_mask (<MODE>mode);
4605      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4606      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4607    }
4608  else
4609    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4610  DONE;
4611})
4612
4613(define_insn "aarch64_simd_st3<mode>"
4614  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4615	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
4616                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4617                   UNSPEC_ST3))]
4618  "TARGET_SIMD"
4619  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4620  [(set_attr "type" "neon_store3_3reg<q>")]
4621)
4622
4623;; RTL uses GCC vector extension indices, so flip only for assembly.
4624(define_insn "aarch64_vec_store_lanesci_lane<mode>"
4625  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4626	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4627		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4628		     (match_operand:SI 2 "immediate_operand" "i")]
4629		    UNSPEC_ST3_LANE))]
4630  "TARGET_SIMD"
4631  {
4632    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4633    return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4634  }
4635  [(set_attr "type" "neon_store3_one_lane<q>")]
4636)
4637
4638(define_expand "vec_store_lanesci<mode>"
4639  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4640	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
4641                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642                   UNSPEC_ST3))]
4643  "TARGET_SIMD"
4644{
4645  if (BYTES_BIG_ENDIAN)
4646    {
4647      rtx tmp = gen_reg_rtx (CImode);
4648      rtx mask = aarch64_reverse_mask (<MODE>mode);
4649      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4650      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4651    }
4652  else
4653    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4654  DONE;
4655})
4656
4657(define_insn "aarch64_simd_ld4<mode>"
4658  [(set (match_operand:XI 0 "register_operand" "=w")
4659	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4660		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4661		   UNSPEC_LD4))]
4662  "TARGET_SIMD"
4663  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4664  [(set_attr "type" "neon_load4_4reg<q>")]
4665)
4666
4667(define_insn "aarch64_simd_ld4r<mode>"
4668  [(set (match_operand:XI 0 "register_operand" "=w")
4669       (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4670                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4671                  UNSPEC_LD4_DUP))]
4672  "TARGET_SIMD"
4673  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4674  [(set_attr "type" "neon_load4_all_lanes<q>")]
4675)
4676
4677(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4678  [(set (match_operand:XI 0 "register_operand" "=w")
4679	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4680		    (match_operand:XI 2 "register_operand" "0")
4681		    (match_operand:SI 3 "immediate_operand" "i")
4682		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4683		   UNSPEC_LD4_LANE))]
4684  "TARGET_SIMD"
4685{
4686    operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4687    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4688}
4689  [(set_attr "type" "neon_load4_one_lane")]
4690)
4691
4692(define_expand "vec_load_lanesxi<mode>"
4693  [(set (match_operand:XI 0 "register_operand" "=w")
4694	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4695		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4696		   UNSPEC_LD4))]
4697  "TARGET_SIMD"
4698{
4699  if (BYTES_BIG_ENDIAN)
4700    {
4701      rtx tmp = gen_reg_rtx (XImode);
4702      rtx mask = aarch64_reverse_mask (<MODE>mode);
4703      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4704      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4705    }
4706  else
4707    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4708  DONE;
4709})
4710
4711(define_insn "aarch64_simd_st4<mode>"
4712  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4713	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
4714                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4715                   UNSPEC_ST4))]
4716  "TARGET_SIMD"
4717  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4718  [(set_attr "type" "neon_store4_4reg<q>")]
4719)
4720
4721;; RTL uses GCC vector extension indices, so flip only for assembly.
4722(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4723  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4724	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4725		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4726		     (match_operand:SI 2 "immediate_operand" "i")]
4727		    UNSPEC_ST4_LANE))]
4728  "TARGET_SIMD"
4729  {
4730    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4731    return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4732  }
4733  [(set_attr "type" "neon_store4_one_lane<q>")]
4734)
4735
4736(define_expand "vec_store_lanesxi<mode>"
4737  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4738	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
4739                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740                   UNSPEC_ST4))]
4741  "TARGET_SIMD"
4742{
4743  if (BYTES_BIG_ENDIAN)
4744    {
4745      rtx tmp = gen_reg_rtx (XImode);
4746      rtx mask = aarch64_reverse_mask (<MODE>mode);
4747      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4748      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4749    }
4750  else
4751    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4752  DONE;
4753})
4754
4755(define_insn_and_split "aarch64_rev_reglist<mode>"
4756[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4757	(unspec:VSTRUCT
4758	           [(match_operand:VSTRUCT 1 "register_operand" "w")
4759		    (match_operand:V16QI 2 "register_operand" "w")]
4760                   UNSPEC_REV_REGLIST))]
4761  "TARGET_SIMD"
4762  "#"
4763  "&& reload_completed"
4764  [(const_int 0)]
4765{
4766  int i;
4767  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4768  for (i = 0; i < nregs; i++)
4769    {
4770      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4771      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4772      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4773    }
4774  DONE;
4775}
4776  [(set_attr "type" "neon_tbl1_q")
4777   (set_attr "length" "<insn_count>")]
4778)
4779
4780;; Reload patterns for AdvSIMD register list operands.
4781
4782(define_expand "mov<mode>"
4783  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4784	(match_operand:VSTRUCT 1 "general_operand" ""))]
4785  "TARGET_SIMD"
4786{
4787  if (can_create_pseudo_p ())
4788    {
4789      if (GET_CODE (operands[0]) != REG)
4790	operands[1] = force_reg (<MODE>mode, operands[1]);
4791    }
4792})
4793
4794(define_insn "*aarch64_mov<mode>"
4795  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4796	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4797  "TARGET_SIMD && !BYTES_BIG_ENDIAN
4798   && (register_operand (operands[0], <MODE>mode)
4799       || register_operand (operands[1], <MODE>mode))"
4800  "@
4801   #
4802   st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4803   ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4804  [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4805		     neon_load<nregs>_<nregs>reg_q")
4806   (set_attr "length" "<insn_count>,4,4")]
4807)
4808
4809(define_insn "aarch64_be_ld1<mode>"
4810  [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
4811	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4812			     "aarch64_simd_struct_operand" "Utv")]
4813	UNSPEC_LD1))]
4814  "TARGET_SIMD"
4815  "ld1\\t{%0<Vmtype>}, %1"
4816  [(set_attr "type" "neon_load1_1reg<q>")]
4817)
4818
4819(define_insn "aarch64_be_st1<mode>"
4820  [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4821	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4822	UNSPEC_ST1))]
4823  "TARGET_SIMD"
4824  "st1\\t{%1<Vmtype>}, %0"
4825  [(set_attr "type" "neon_store1_1reg<q>")]
4826)
4827
4828(define_insn "*aarch64_be_movoi"
4829  [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4830	(match_operand:OI 1 "general_operand"      " w,w,m"))]
4831  "TARGET_SIMD && BYTES_BIG_ENDIAN
4832   && (register_operand (operands[0], OImode)
4833       || register_operand (operands[1], OImode))"
4834  "@
4835   #
4836   stp\\t%q1, %R1, %0
4837   ldp\\t%q0, %R0, %1"
4838  [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4839   (set_attr "length" "8,4,4")]
4840)
4841
4842(define_insn "*aarch64_be_movci"
4843  [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4844	(match_operand:CI 1 "general_operand"      " w,w,o"))]
4845  "TARGET_SIMD && BYTES_BIG_ENDIAN
4846   && (register_operand (operands[0], CImode)
4847       || register_operand (operands[1], CImode))"
4848  "#"
4849  [(set_attr "type" "multiple")
4850   (set_attr "length" "12,4,4")]
4851)
4852
4853(define_insn "*aarch64_be_movxi"
4854  [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4855	(match_operand:XI 1 "general_operand"      " w,w,o"))]
4856  "TARGET_SIMD && BYTES_BIG_ENDIAN
4857   && (register_operand (operands[0], XImode)
4858       || register_operand (operands[1], XImode))"
4859  "#"
4860  [(set_attr "type" "multiple")
4861   (set_attr "length" "16,4,4")]
4862)
4863
4864(define_split
4865  [(set (match_operand:OI 0 "register_operand")
4866	(match_operand:OI 1 "register_operand"))]
4867  "TARGET_SIMD && reload_completed"
4868  [(const_int 0)]
4869{
4870  aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4871  DONE;
4872})
4873
4874(define_split
4875  [(set (match_operand:CI 0 "nonimmediate_operand")
4876	(match_operand:CI 1 "general_operand"))]
4877  "TARGET_SIMD && reload_completed"
4878  [(const_int 0)]
4879{
4880  if (register_operand (operands[0], CImode)
4881      && register_operand (operands[1], CImode))
4882    {
4883      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4884      DONE;
4885    }
4886  else if (BYTES_BIG_ENDIAN)
4887    {
4888      emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4889		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
4890      emit_move_insn (gen_lowpart (V16QImode,
4891				   simplify_gen_subreg (TImode, operands[0],
4892							CImode, 32)),
4893		      gen_lowpart (V16QImode,
4894				   simplify_gen_subreg (TImode, operands[1],
4895							CImode, 32)));
4896      DONE;
4897    }
4898  else
4899    FAIL;
4900})
4901
4902(define_split
4903  [(set (match_operand:XI 0 "nonimmediate_operand")
4904	(match_operand:XI 1 "general_operand"))]
4905  "TARGET_SIMD && reload_completed"
4906  [(const_int 0)]
4907{
4908  if (register_operand (operands[0], XImode)
4909      && register_operand (operands[1], XImode))
4910    {
4911      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4912      DONE;
4913    }
4914  else if (BYTES_BIG_ENDIAN)
4915    {
4916      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4917		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
4918      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4919		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
4920      DONE;
4921    }
4922  else
4923    FAIL;
4924})
4925
4926(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4927  [(match_operand:VSTRUCT 0 "register_operand" "=w")
4928   (match_operand:DI 1 "register_operand" "w")
4929   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4930  "TARGET_SIMD"
4931{
4932  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4933  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4934		     * <VSTRUCT:nregs>);
4935
4936  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4937								mem));
4938  DONE;
4939})
4940
4941(define_insn "aarch64_ld2<mode>_dreg_le"
4942  [(set (match_operand:OI 0 "register_operand" "=w")
4943	(subreg:OI
4944	  (vec_concat:<VRL2>
4945	    (vec_concat:<VDBL>
4946	     (unspec:VD
4947		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4948		UNSPEC_LD2)
4949	     (vec_duplicate:VD (const_int 0)))
4950	    (vec_concat:<VDBL>
4951	     (unspec:VD [(match_dup 1)]
4952			UNSPEC_LD2)
4953	     (vec_duplicate:VD (const_int 0)))) 0))]
4954  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4955  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4956  [(set_attr "type" "neon_load2_2reg<q>")]
4957)
4958
4959(define_insn "aarch64_ld2<mode>_dreg_be"
4960  [(set (match_operand:OI 0 "register_operand" "=w")
4961	(subreg:OI
4962	  (vec_concat:<VRL2>
4963	    (vec_concat:<VDBL>
4964	     (vec_duplicate:VD (const_int 0))
4965	     (unspec:VD
4966		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4967		UNSPEC_LD2))
4968	    (vec_concat:<VDBL>
4969	     (vec_duplicate:VD (const_int 0))
4970	     (unspec:VD [(match_dup 1)]
4971			UNSPEC_LD2))) 0))]
4972  "TARGET_SIMD && BYTES_BIG_ENDIAN"
4973  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4974  [(set_attr "type" "neon_load2_2reg<q>")]
4975)
4976
4977(define_insn "aarch64_ld2<mode>_dreg_le"
4978  [(set (match_operand:OI 0 "register_operand" "=w")
4979	(subreg:OI
4980	  (vec_concat:<VRL2>
4981	    (vec_concat:<VDBL>
4982	     (unspec:DX
4983		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4984		UNSPEC_LD2)
4985	     (const_int 0))
4986	    (vec_concat:<VDBL>
4987	     (unspec:DX [(match_dup 1)]
4988			UNSPEC_LD2)
4989	     (const_int 0))) 0))]
4990  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4991  "ld1\\t{%S0.1d - %T0.1d}, %1"
4992  [(set_attr "type" "neon_load1_2reg<q>")]
4993)
4994
4995(define_insn "aarch64_ld2<mode>_dreg_be"
4996  [(set (match_operand:OI 0 "register_operand" "=w")
4997	(subreg:OI
4998	  (vec_concat:<VRL2>
4999	    (vec_concat:<VDBL>
5000	     (const_int 0)
5001	     (unspec:DX
5002		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5003		UNSPEC_LD2))
5004	    (vec_concat:<VDBL>
5005	     (const_int 0)
5006	     (unspec:DX [(match_dup 1)]
5007			UNSPEC_LD2))) 0))]
5008  "TARGET_SIMD && BYTES_BIG_ENDIAN"
5009  "ld1\\t{%S0.1d - %T0.1d}, %1"
5010  [(set_attr "type" "neon_load1_2reg<q>")]
5011)
5012
5013(define_insn "aarch64_ld3<mode>_dreg_le"
5014  [(set (match_operand:CI 0 "register_operand" "=w")
5015	(subreg:CI
5016	 (vec_concat:<VRL3>
5017	  (vec_concat:<VRL2>
5018	    (vec_concat:<VDBL>
5019	     (unspec:VD
5020		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5021		UNSPEC_LD3)
5022	     (vec_duplicate:VD (const_int 0)))
5023	    (vec_concat:<VDBL>
5024	     (unspec:VD [(match_dup 1)]
5025			UNSPEC_LD3)
5026	     (vec_duplicate:VD (const_int 0))))
5027	  (vec_concat:<VDBL>
5028	     (unspec:VD [(match_dup 1)]
5029			UNSPEC_LD3)
5030	     (vec_duplicate:VD (const_int 0)))) 0))]
5031  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5032  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5033  [(set_attr "type" "neon_load3_3reg<q>")]
5034)
5035
5036(define_insn "aarch64_ld3<mode>_dreg_be"
5037  [(set (match_operand:CI 0 "register_operand" "=w")
5038	(subreg:CI
5039	 (vec_concat:<VRL3>
5040	  (vec_concat:<VRL2>
5041	    (vec_concat:<VDBL>
5042	     (vec_duplicate:VD (const_int 0))
5043	     (unspec:VD
5044		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5045		UNSPEC_LD3))
5046	    (vec_concat:<VDBL>
5047	     (vec_duplicate:VD (const_int 0))
5048	     (unspec:VD [(match_dup 1)]
5049			UNSPEC_LD3)))
5050	  (vec_concat:<VDBL>
5051	     (vec_duplicate:VD (const_int 0))
5052	     (unspec:VD [(match_dup 1)]
5053			UNSPEC_LD3))) 0))]
5054  "TARGET_SIMD && BYTES_BIG_ENDIAN"
5055  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5056  [(set_attr "type" "neon_load3_3reg<q>")]
5057)
5058
5059(define_insn "aarch64_ld3<mode>_dreg_le"
5060  [(set (match_operand:CI 0 "register_operand" "=w")
5061	(subreg:CI
5062	 (vec_concat:<VRL3>
5063	  (vec_concat:<VRL2>
5064	    (vec_concat:<VDBL>
5065	     (unspec:DX
5066		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5067		UNSPEC_LD3)
5068	     (const_int 0))
5069	    (vec_concat:<VDBL>
5070	     (unspec:DX [(match_dup 1)]
5071			UNSPEC_LD3)
5072	     (const_int 0)))
5073	  (vec_concat:<VDBL>
5074	     (unspec:DX [(match_dup 1)]
5075			UNSPEC_LD3)
5076	     (const_int 0))) 0))]
5077  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5078  "ld1\\t{%S0.1d - %U0.1d}, %1"
5079  [(set_attr "type" "neon_load1_3reg<q>")]
5080)
5081
5082(define_insn "aarch64_ld3<mode>_dreg_be"
5083  [(set (match_operand:CI 0 "register_operand" "=w")
5084	(subreg:CI
5085	 (vec_concat:<VRL3>
5086	  (vec_concat:<VRL2>
5087	    (vec_concat:<VDBL>
5088	     (const_int 0)
5089	     (unspec:DX
5090		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5091		UNSPEC_LD3))
5092	    (vec_concat:<VDBL>
5093	     (const_int 0)
5094	     (unspec:DX [(match_dup 1)]
5095			UNSPEC_LD3)))
5096	  (vec_concat:<VDBL>
5097	     (const_int 0)
5098	     (unspec:DX [(match_dup 1)]
5099			UNSPEC_LD3))) 0))]
5100  "TARGET_SIMD && BYTES_BIG_ENDIAN"
5101  "ld1\\t{%S0.1d - %U0.1d}, %1"
5102  [(set_attr "type" "neon_load1_3reg<q>")]
5103)
5104
5105(define_insn "aarch64_ld4<mode>_dreg_le"
5106  [(set (match_operand:XI 0 "register_operand" "=w")
5107	(subreg:XI
5108	 (vec_concat:<VRL4>
5109	   (vec_concat:<VRL2>
5110	     (vec_concat:<VDBL>
5111	       (unspec:VD
5112		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5113		UNSPEC_LD4)
5114	       (vec_duplicate:VD (const_int 0)))
5115	      (vec_concat:<VDBL>
5116		(unspec:VD [(match_dup 1)]
5117			UNSPEC_LD4)
5118		(vec_duplicate:VD (const_int 0))))
5119	   (vec_concat:<VRL2>
5120	     (vec_concat:<VDBL>
5121	       (unspec:VD [(match_dup 1)]
5122			UNSPEC_LD4)
5123	       (vec_duplicate:VD (const_int 0)))
5124	     (vec_concat:<VDBL>
5125	       (unspec:VD [(match_dup 1)]
5126			UNSPEC_LD4)
5127	       (vec_duplicate:VD (const_int 0))))) 0))]
5128  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5129  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5130  [(set_attr "type" "neon_load4_4reg<q>")]
5131)
5132
5133(define_insn "aarch64_ld4<mode>_dreg_be"
5134  [(set (match_operand:XI 0 "register_operand" "=w")
5135	(subreg:XI
5136	 (vec_concat:<VRL4>
5137	   (vec_concat:<VRL2>
5138	     (vec_concat:<VDBL>
5139	       (vec_duplicate:VD (const_int 0))
5140	       (unspec:VD
5141		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5142		UNSPEC_LD4))
5143	      (vec_concat:<VDBL>
5144		(vec_duplicate:VD (const_int 0))
5145		(unspec:VD [(match_dup 1)]
5146			UNSPEC_LD4)))
5147	   (vec_concat:<VRL2>
5148	     (vec_concat:<VDBL>
5149	       (vec_duplicate:VD (const_int 0))
5150	       (unspec:VD [(match_dup 1)]
5151			UNSPEC_LD4))
5152	     (vec_concat:<VDBL>
5153	       (vec_duplicate:VD (const_int 0))
5154	       (unspec:VD [(match_dup 1)]
5155			UNSPEC_LD4)))) 0))]
5156  "TARGET_SIMD && BYTES_BIG_ENDIAN"
5157  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5158  [(set_attr "type" "neon_load4_4reg<q>")]
5159)
5160
5161(define_insn "aarch64_ld4<mode>_dreg_le"
5162  [(set (match_operand:XI 0 "register_operand" "=w")
5163	(subreg:XI
5164	 (vec_concat:<VRL4>
5165	   (vec_concat:<VRL2>
5166	     (vec_concat:<VDBL>
5167	       (unspec:DX
5168		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5169		UNSPEC_LD4)
5170	       (const_int 0))
5171	      (vec_concat:<VDBL>
5172	        (unspec:DX [(match_dup 1)]
5173			UNSPEC_LD4)
5174		(const_int 0)))
5175	   (vec_concat:<VRL2>
5176	     (vec_concat:<VDBL>
5177	       (unspec:DX [(match_dup 1)]
5178			UNSPEC_LD4)
5179	       (const_int 0))
5180	     (vec_concat:<VDBL>
5181	       (unspec:DX [(match_dup 1)]
5182			UNSPEC_LD4)
5183	       (const_int 0)))) 0))]
5184  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5185  "ld1\\t{%S0.1d - %V0.1d}, %1"
5186  [(set_attr "type" "neon_load1_4reg<q>")]
5187)
5188
5189(define_insn "aarch64_ld4<mode>_dreg_be"
5190  [(set (match_operand:XI 0 "register_operand" "=w")
5191	(subreg:XI
5192	 (vec_concat:<VRL4>
5193	   (vec_concat:<VRL2>
5194	     (vec_concat:<VDBL>
5195	       (const_int 0)
5196	       (unspec:DX
5197		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5198		UNSPEC_LD4))
5199	      (vec_concat:<VDBL>
5200		(const_int 0)
5201		(unspec:DX [(match_dup 1)]
5202			UNSPEC_LD4)))
5203	   (vec_concat:<VRL2>
5204	     (vec_concat:<VDBL>
5205	       (const_int 0)
5206	       (unspec:DX [(match_dup 1)]
5207			UNSPEC_LD4))
5208	     (vec_concat:<VDBL>
5209	       (const_int 0)
5210	       (unspec:DX [(match_dup 1)]
5211			UNSPEC_LD4)))) 0))]
5212  "TARGET_SIMD && BYTES_BIG_ENDIAN"
5213  "ld1\\t{%S0.1d - %V0.1d}, %1"
5214  [(set_attr "type" "neon_load1_4reg<q>")]
5215)
5216
5217(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5218 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5219  (match_operand:DI 1 "register_operand" "r")
5220  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5221  "TARGET_SIMD"
5222{
5223  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5224  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5225
5226  if (BYTES_BIG_ENDIAN)
5227    emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0],
5228								mem));
5229  else
5230    emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0],
5231								mem));
5232  DONE;
5233})
5234
5235(define_expand "aarch64_ld1<VALL_F16:mode>"
5236 [(match_operand:VALL_F16 0 "register_operand")
5237  (match_operand:DI 1 "register_operand")]
5238  "TARGET_SIMD"
5239{
5240  machine_mode mode = <VALL_F16:MODE>mode;
5241  rtx mem = gen_rtx_MEM (mode, operands[1]);
5242
5243  if (BYTES_BIG_ENDIAN)
5244    emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5245  else
5246    emit_move_insn (operands[0], mem);
5247  DONE;
5248})
5249
5250(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5251 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5252  (match_operand:DI 1 "register_operand" "r")
5253  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254  "TARGET_SIMD"
5255{
5256  machine_mode mode = <VSTRUCT:MODE>mode;
5257  rtx mem = gen_rtx_MEM (mode, operands[1]);
5258
5259  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5260  DONE;
5261})
5262
5263(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5264  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5265	(match_operand:DI 1 "register_operand" "w")
5266	(match_operand:VSTRUCT 2 "register_operand" "0")
5267	(match_operand:SI 3 "immediate_operand" "i")
5268	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5269  "TARGET_SIMD"
5270{
5271  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5272  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5273		     * <VSTRUCT:nregs>);
5274
5275  aarch64_simd_lane_bounds (operands[3], 0,
5276			    GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5277			    NULL);
5278  emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5279	operands[0], mem, operands[2], operands[3]));
5280  DONE;
5281})
5282
5283;; Expanders for builtins to extract vector registers from large
5284;; opaque integer modes.
5285
5286;; D-register list.
5287
5288(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5289 [(match_operand:VDC 0 "register_operand" "=w")
5290  (match_operand:VSTRUCT 1 "register_operand" "w")
5291  (match_operand:SI 2 "immediate_operand" "i")]
5292  "TARGET_SIMD"
5293{
5294  int part = INTVAL (operands[2]);
5295  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5296  int offset = part * 16;
5297
5298  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5299  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5300  DONE;
5301})
5302
5303;; Q-register list.
5304
5305(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5306 [(match_operand:VQ 0 "register_operand" "=w")
5307  (match_operand:VSTRUCT 1 "register_operand" "w")
5308  (match_operand:SI 2 "immediate_operand" "i")]
5309  "TARGET_SIMD"
5310{
5311  int part = INTVAL (operands[2]);
5312  int offset = part * 16;
5313
5314  emit_move_insn (operands[0],
5315		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5316  DONE;
5317})
5318
5319;; Permuted-store expanders for neon intrinsics.
5320
5321;; Permute instructions
5322
5323;; vec_perm support
5324
5325(define_expand "vec_perm_const<mode>"
5326  [(match_operand:VALL_F16 0 "register_operand")
5327   (match_operand:VALL_F16 1 "register_operand")
5328   (match_operand:VALL_F16 2 "register_operand")
5329   (match_operand:<V_cmp_result> 3)]
5330  "TARGET_SIMD"
5331{
5332  if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5333				     operands[2], operands[3]))
5334    DONE;
5335  else
5336    FAIL;
5337})
5338
5339(define_expand "vec_perm<mode>"
5340  [(match_operand:VB 0 "register_operand")
5341   (match_operand:VB 1 "register_operand")
5342   (match_operand:VB 2 "register_operand")
5343   (match_operand:VB 3 "register_operand")]
5344  "TARGET_SIMD"
5345{
5346  aarch64_expand_vec_perm (operands[0], operands[1],
5347			   operands[2], operands[3]);
5348  DONE;
5349})
5350
5351(define_insn "aarch64_tbl1<mode>"
5352  [(set (match_operand:VB 0 "register_operand" "=w")
5353	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5354		    (match_operand:VB 2 "register_operand" "w")]
5355		   UNSPEC_TBL))]
5356  "TARGET_SIMD"
5357  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5358  [(set_attr "type" "neon_tbl1<q>")]
5359)
5360
5361;; Two source registers.
5362
5363(define_insn "aarch64_tbl2v16qi"
5364  [(set (match_operand:V16QI 0 "register_operand" "=w")
5365	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5366		       (match_operand:V16QI 2 "register_operand" "w")]
5367		      UNSPEC_TBL))]
5368  "TARGET_SIMD"
5369  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5370  [(set_attr "type" "neon_tbl2_q")]
5371)
5372
5373(define_insn "aarch64_tbl3<mode>"
5374  [(set (match_operand:VB 0 "register_operand" "=w")
5375	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
5376		      (match_operand:VB 2 "register_operand" "w")]
5377		      UNSPEC_TBL))]
5378  "TARGET_SIMD"
5379  "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5380  [(set_attr "type" "neon_tbl3")]
5381)
5382
5383(define_insn "aarch64_tbx4<mode>"
5384  [(set (match_operand:VB 0 "register_operand" "=w")
5385	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5386		      (match_operand:OI 2 "register_operand" "w")
5387		      (match_operand:VB 3 "register_operand" "w")]
5388		      UNSPEC_TBX))]
5389  "TARGET_SIMD"
5390  "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5391  [(set_attr "type" "neon_tbl4")]
5392)
5393
5394;; Three source registers.
5395
5396(define_insn "aarch64_qtbl3<mode>"
5397  [(set (match_operand:VB 0 "register_operand" "=w")
5398	(unspec:VB [(match_operand:CI 1 "register_operand" "w")
5399		      (match_operand:VB 2 "register_operand" "w")]
5400		      UNSPEC_TBL))]
5401  "TARGET_SIMD"
5402  "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5403  [(set_attr "type" "neon_tbl3")]
5404)
5405
5406(define_insn "aarch64_qtbx3<mode>"
5407  [(set (match_operand:VB 0 "register_operand" "=w")
5408	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5409		      (match_operand:CI 2 "register_operand" "w")
5410		      (match_operand:VB 3 "register_operand" "w")]
5411		      UNSPEC_TBX))]
5412  "TARGET_SIMD"
5413  "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5414  [(set_attr "type" "neon_tbl3")]
5415)
5416
5417;; Four source registers.
5418
5419(define_insn "aarch64_qtbl4<mode>"
5420  [(set (match_operand:VB 0 "register_operand" "=w")
5421	(unspec:VB [(match_operand:XI 1 "register_operand" "w")
5422		      (match_operand:VB 2 "register_operand" "w")]
5423		      UNSPEC_TBL))]
5424  "TARGET_SIMD"
5425  "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5426  [(set_attr "type" "neon_tbl4")]
5427)
5428
5429(define_insn "aarch64_qtbx4<mode>"
5430  [(set (match_operand:VB 0 "register_operand" "=w")
5431	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5432		      (match_operand:XI 2 "register_operand" "w")
5433		      (match_operand:VB 3 "register_operand" "w")]
5434		      UNSPEC_TBX))]
5435  "TARGET_SIMD"
5436  "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5437  [(set_attr "type" "neon_tbl4")]
5438)
5439
5440(define_insn_and_split "aarch64_combinev16qi"
5441  [(set (match_operand:OI 0 "register_operand" "=w")
5442	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5443		    (match_operand:V16QI 2 "register_operand" "w")]
5444		   UNSPEC_CONCAT))]
5445  "TARGET_SIMD"
5446  "#"
5447  "&& reload_completed"
5448  [(const_int 0)]
5449{
5450  aarch64_split_combinev16qi (operands);
5451  DONE;
5452}
5453[(set_attr "type" "multiple")]
5454)
5455
5456(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5457  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5458	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5459			  (match_operand:VALL_F16 2 "register_operand" "w")]
5460	 PERMUTE))]
5461  "TARGET_SIMD"
5462  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5463  [(set_attr "type" "neon_permute<q>")]
5464)
5465
5466;; Note immediate (third) operand is lane index not byte index.
5467(define_insn "aarch64_ext<mode>"
5468  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5469        (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5470			  (match_operand:VALL_F16 2 "register_operand" "w")
5471			  (match_operand:SI 3 "immediate_operand" "i")]
5472	 UNSPEC_EXT))]
5473  "TARGET_SIMD"
5474{
5475  operands[3] = GEN_INT (INTVAL (operands[3])
5476      * GET_MODE_UNIT_SIZE (<MODE>mode));
5477  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5478}
5479  [(set_attr "type" "neon_ext<q>")]
5480)
5481
5482(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5483  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5484	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5485                    REVERSE))]
5486  "TARGET_SIMD"
5487  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5488  [(set_attr "type" "neon_rev<q>")]
5489)
5490
5491(define_insn "aarch64_st2<mode>_dreg"
5492  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5493	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5494                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5495                   UNSPEC_ST2))]
5496  "TARGET_SIMD"
5497  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5498  [(set_attr "type" "neon_store2_2reg")]
5499)
5500
5501(define_insn "aarch64_st2<mode>_dreg"
5502  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5503	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5504                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5505                   UNSPEC_ST2))]
5506  "TARGET_SIMD"
5507  "st1\\t{%S1.1d - %T1.1d}, %0"
5508  [(set_attr "type" "neon_store1_2reg")]
5509)
5510
5511(define_insn "aarch64_st3<mode>_dreg"
5512  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5513	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5514                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5515                   UNSPEC_ST3))]
5516  "TARGET_SIMD"
5517  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5518  [(set_attr "type" "neon_store3_3reg")]
5519)
5520
5521(define_insn "aarch64_st3<mode>_dreg"
5522  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5523	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5524                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5525                   UNSPEC_ST3))]
5526  "TARGET_SIMD"
5527  "st1\\t{%S1.1d - %U1.1d}, %0"
5528  [(set_attr "type" "neon_store1_3reg")]
5529)
5530
5531(define_insn "aarch64_st4<mode>_dreg"
5532  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5533	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5534                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535                   UNSPEC_ST4))]
5536  "TARGET_SIMD"
5537  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5538  [(set_attr "type" "neon_store4_4reg")]
5539)
5540
5541(define_insn "aarch64_st4<mode>_dreg"
5542  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5543	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5544                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5545                   UNSPEC_ST4))]
5546  "TARGET_SIMD"
5547  "st1\\t{%S1.1d - %V1.1d}, %0"
5548  [(set_attr "type" "neon_store1_4reg")]
5549)
5550
5551(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5552 [(match_operand:DI 0 "register_operand" "r")
5553  (match_operand:VSTRUCT 1 "register_operand" "w")
5554  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5555  "TARGET_SIMD"
5556{
5557  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5558  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5559
5560  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5561  DONE;
5562})
5563
5564(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5565 [(match_operand:DI 0 "register_operand" "r")
5566  (match_operand:VSTRUCT 1 "register_operand" "w")
5567  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568  "TARGET_SIMD"
5569{
5570  machine_mode mode = <VSTRUCT:MODE>mode;
5571  rtx mem = gen_rtx_MEM (mode, operands[0]);
5572
5573  emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5574  DONE;
5575})
5576
5577(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5578 [(match_operand:DI 0 "register_operand" "r")
5579  (match_operand:VSTRUCT 1 "register_operand" "w")
5580  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5581  (match_operand:SI 2 "immediate_operand")]
5582  "TARGET_SIMD"
5583{
5584  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5585  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5586		     * <VSTRUCT:nregs>);
5587
5588  emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5589		mem, operands[1], operands[2]));
5590  DONE;
5591})
5592
5593(define_expand "aarch64_st1<VALL_F16:mode>"
5594 [(match_operand:DI 0 "register_operand")
5595  (match_operand:VALL_F16 1 "register_operand")]
5596  "TARGET_SIMD"
5597{
5598  machine_mode mode = <VALL_F16:MODE>mode;
5599  rtx mem = gen_rtx_MEM (mode, operands[0]);
5600
5601  if (BYTES_BIG_ENDIAN)
5602    emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5603  else
5604    emit_move_insn (mem, operands[1]);
5605  DONE;
5606})
5607
5608;; Expander for builtins to insert vector registers into large
5609;; opaque integer modes.
5610
5611;; Q-register list.  We don't need a D-reg inserter as we zero
5612;; extend them in arm_neon.h and insert the resulting Q-regs.
5613
5614(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5615 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5616  (match_operand:VSTRUCT 1 "register_operand" "0")
5617  (match_operand:VQ 2 "register_operand" "w")
5618  (match_operand:SI 3 "immediate_operand" "i")]
5619  "TARGET_SIMD"
5620{
5621  int part = INTVAL (operands[3]);
5622  int offset = part * 16;
5623
5624  emit_move_insn (operands[0], operands[1]);
5625  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5626		  operands[2]);
5627  DONE;
5628})
5629
5630;; Standard pattern name vec_init<mode>.
5631
5632(define_expand "vec_init<mode>"
5633  [(match_operand:VALL_F16 0 "register_operand" "")
5634   (match_operand 1 "" "")]
5635  "TARGET_SIMD"
5636{
5637  aarch64_expand_vector_init (operands[0], operands[1]);
5638  DONE;
5639})
5640
5641(define_insn "*aarch64_simd_ld1r<mode>"
5642  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5643	(vec_duplicate:VALL_F16
5644	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5645  "TARGET_SIMD"
5646  "ld1r\\t{%0.<Vtype>}, %1"
5647  [(set_attr "type" "neon_load1_all_lanes")]
5648)
5649
5650(define_insn "aarch64_frecpe<mode>"
5651  [(set (match_operand:VHSDF 0 "register_operand" "=w")
5652	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5653	 UNSPEC_FRECPE))]
5654  "TARGET_SIMD"
5655  "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5656  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5657)
5658
5659(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5660  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5661	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5662	 FRECP))]
5663  "TARGET_SIMD"
5664  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5665  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5666)
5667
5668(define_insn "aarch64_frecps<mode>"
5669  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5670	(unspec:VHSDF_HSDF
5671	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5672	  (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5673	  UNSPEC_FRECPS))]
5674  "TARGET_SIMD"
5675  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5676  [(set_attr "type" "neon_fp_recps_<stype><q>")]
5677)
5678
5679(define_insn "aarch64_urecpe<mode>"
5680  [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5681        (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5682                UNSPEC_URECPE))]
5683 "TARGET_SIMD"
5684 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5685  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5686
5687;; Standard pattern name vec_extract<mode>.
5688
5689(define_expand "vec_extract<mode>"
5690  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5691   (match_operand:VALL_F16 1 "register_operand" "")
5692   (match_operand:SI 2 "immediate_operand" "")]
5693  "TARGET_SIMD"
5694{
5695    emit_insn
5696      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5697    DONE;
5698})
5699
5700;; aes
5701
5702(define_insn "aarch64_crypto_aes<aes_op>v16qi"
5703  [(set (match_operand:V16QI 0 "register_operand" "=w")
5704        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5705		       (match_operand:V16QI 2 "register_operand" "w")]
5706         CRYPTO_AES))]
5707  "TARGET_SIMD && TARGET_CRYPTO"
5708  "aes<aes_op>\\t%0.16b, %2.16b"
5709  [(set_attr "type" "crypto_aese")]
5710)
5711
5712;; When AES/AESMC fusion is enabled we want the register allocation to
5713;; look like:
5714;;    AESE Vn, _
5715;;    AESMC Vn, Vn
5716;; So prefer to tie operand 1 to operand 0 when fusing.
5717
5718(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5719  [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5720	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5721	 CRYPTO_AESMC))]
5722  "TARGET_SIMD && TARGET_CRYPTO"
5723  "aes<aesmc_op>\\t%0.16b, %1.16b"
5724  [(set_attr "type" "crypto_aesmc")
5725   (set_attr_alternative "enabled"
5726     [(if_then_else (match_test
5727		       "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5728		     (const_string "yes" )
5729		     (const_string "no"))
5730      (const_string "yes")])]
5731)
5732
5733;; sha1
5734
5735(define_insn "aarch64_crypto_sha1hsi"
5736  [(set (match_operand:SI 0 "register_operand" "=w")
5737        (unspec:SI [(match_operand:SI 1
5738                       "register_operand" "w")]
5739         UNSPEC_SHA1H))]
5740  "TARGET_SIMD && TARGET_CRYPTO"
5741  "sha1h\\t%s0, %s1"
5742  [(set_attr "type" "crypto_sha1_fast")]
5743)
5744
5745(define_insn "aarch64_crypto_sha1hv4si"
5746  [(set (match_operand:SI 0 "register_operand" "=w")
5747	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5748		     (parallel [(const_int 0)]))]
5749	 UNSPEC_SHA1H))]
5750  "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5751  "sha1h\\t%s0, %s1"
5752  [(set_attr "type" "crypto_sha1_fast")]
5753)
5754
5755(define_insn "aarch64_be_crypto_sha1hv4si"
5756  [(set (match_operand:SI 0 "register_operand" "=w")
5757	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5758		     (parallel [(const_int 3)]))]
5759	 UNSPEC_SHA1H))]
5760  "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5761  "sha1h\\t%s0, %s1"
5762  [(set_attr "type" "crypto_sha1_fast")]
5763)
5764
5765(define_insn "aarch64_crypto_sha1su1v4si"
5766  [(set (match_operand:V4SI 0 "register_operand" "=w")
5767        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5768                      (match_operand:V4SI 2 "register_operand" "w")]
5769         UNSPEC_SHA1SU1))]
5770  "TARGET_SIMD && TARGET_CRYPTO"
5771  "sha1su1\\t%0.4s, %2.4s"
5772  [(set_attr "type" "crypto_sha1_fast")]
5773)
5774
5775(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5776  [(set (match_operand:V4SI 0 "register_operand" "=w")
5777        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5778                      (match_operand:SI 2 "register_operand" "w")
5779                      (match_operand:V4SI 3 "register_operand" "w")]
5780         CRYPTO_SHA1))]
5781  "TARGET_SIMD && TARGET_CRYPTO"
5782  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5783  [(set_attr "type" "crypto_sha1_slow")]
5784)
5785
5786(define_insn "aarch64_crypto_sha1su0v4si"
5787  [(set (match_operand:V4SI 0 "register_operand" "=w")
5788        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5789                      (match_operand:V4SI 2 "register_operand" "w")
5790                      (match_operand:V4SI 3 "register_operand" "w")]
5791         UNSPEC_SHA1SU0))]
5792  "TARGET_SIMD && TARGET_CRYPTO"
5793  "sha1su0\\t%0.4s, %2.4s, %3.4s"
5794  [(set_attr "type" "crypto_sha1_xor")]
5795)
5796
5797;; sha256
5798
5799(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5800  [(set (match_operand:V4SI 0 "register_operand" "=w")
5801        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5802                      (match_operand:V4SI 2 "register_operand" "w")
5803                      (match_operand:V4SI 3 "register_operand" "w")]
5804         CRYPTO_SHA256))]
5805  "TARGET_SIMD && TARGET_CRYPTO"
5806  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5807  [(set_attr "type" "crypto_sha256_slow")]
5808)
5809
5810(define_insn "aarch64_crypto_sha256su0v4si"
5811  [(set (match_operand:V4SI 0 "register_operand" "=w")
5812        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5813                      (match_operand:V4SI 2 "register_operand" "w")]
5814         UNSPEC_SHA256SU0))]
5815  "TARGET_SIMD &&TARGET_CRYPTO"
5816  "sha256su0\\t%0.4s, %2.4s"
5817  [(set_attr "type" "crypto_sha256_fast")]
5818)
5819
5820(define_insn "aarch64_crypto_sha256su1v4si"
5821  [(set (match_operand:V4SI 0 "register_operand" "=w")
5822        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5823                      (match_operand:V4SI 2 "register_operand" "w")
5824                      (match_operand:V4SI 3 "register_operand" "w")]
5825         UNSPEC_SHA256SU1))]
5826  "TARGET_SIMD &&TARGET_CRYPTO"
5827  "sha256su1\\t%0.4s, %2.4s, %3.4s"
5828  [(set_attr "type" "crypto_sha256_slow")]
5829)
5830
5831;; pmull
5832
5833(define_insn "aarch64_crypto_pmulldi"
5834  [(set (match_operand:TI 0 "register_operand" "=w")
5835        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5836		     (match_operand:DI 2 "register_operand" "w")]
5837		    UNSPEC_PMULL))]
5838 "TARGET_SIMD && TARGET_CRYPTO"
5839 "pmull\\t%0.1q, %1.1d, %2.1d"
5840  [(set_attr "type" "neon_mul_d_long")]
5841)
5842
5843(define_insn "aarch64_crypto_pmullv2di"
5844 [(set (match_operand:TI 0 "register_operand" "=w")
5845       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5846		   (match_operand:V2DI 2 "register_operand" "w")]
5847		  UNSPEC_PMULL2))]
5848  "TARGET_SIMD && TARGET_CRYPTO"
5849  "pmull2\\t%0.1q, %1.2d, %2.2d"
5850  [(set_attr "type" "neon_mul_d_long")]
5851)
5852