xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/i386/sse.md (revision fa28c6faa16e0b00edee7acdcaf4899797043def)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  ;; SSE
22  UNSPEC_MOVNT
23  UNSPEC_LOADU
24  UNSPEC_STOREU
25
26  ;; SSE3
27  UNSPEC_LDDQU
28
29  ;; SSSE3
30  UNSPEC_PSHUFB
31  UNSPEC_PSIGN
32  UNSPEC_PALIGNR
33
34  ;; For SSE4A support
35  UNSPEC_EXTRQI
36  UNSPEC_EXTRQ
37  UNSPEC_INSERTQI
38  UNSPEC_INSERTQ
39
40  ;; For SSE4.1 support
41  UNSPEC_BLENDV
42  UNSPEC_INSERTPS
43  UNSPEC_DP
44  UNSPEC_MOVNTDQA
45  UNSPEC_MPSADBW
46  UNSPEC_PHMINPOSUW
47  UNSPEC_PTEST
48
49  ;; For SSE4.2 support
50  UNSPEC_PCMPESTR
51  UNSPEC_PCMPISTR
52
53  ;; For FMA4 support
54  UNSPEC_FMADDSUB
55  UNSPEC_XOP_UNSIGNED_CMP
56  UNSPEC_XOP_TRUEFALSE
57  UNSPEC_XOP_PERMUTE
58  UNSPEC_FRCZ
59
60  ;; For AES support
61  UNSPEC_AESENC
62  UNSPEC_AESENCLAST
63  UNSPEC_AESDEC
64  UNSPEC_AESDECLAST
65  UNSPEC_AESIMC
66  UNSPEC_AESKEYGENASSIST
67
68  ;; For PCLMUL support
69  UNSPEC_PCLMUL
70
71  ;; For AVX support
72  UNSPEC_PCMP
73  UNSPEC_VPERMIL
74  UNSPEC_VPERMIL2
75  UNSPEC_VPERMIL2F128
76  UNSPEC_CAST
77  UNSPEC_VTESTP
78  UNSPEC_VCVTPH2PS
79  UNSPEC_VCVTPS2PH
80
81  ;; For AVX2 support
82  UNSPEC_VPERMVAR
83  UNSPEC_VPERMTI
84  UNSPEC_GATHER
85  UNSPEC_VSIBADDR
86])
87
88(define_c_enum "unspecv" [
89  UNSPECV_LDMXCSR
90  UNSPECV_STMXCSR
91  UNSPECV_CLFLUSH
92  UNSPECV_MONITOR
93  UNSPECV_MWAIT
94  UNSPECV_VZEROALL
95  UNSPECV_VZEROUPPER
96])
97
98;; All vector modes including V?TImode, used in move patterns.
99(define_mode_iterator V16
100  [(V32QI "TARGET_AVX") V16QI
101   (V16HI "TARGET_AVX") V8HI
102   (V8SI "TARGET_AVX") V4SI
103   (V4DI "TARGET_AVX") V2DI
104   (V2TI "TARGET_AVX") V1TI
105   (V8SF "TARGET_AVX") V4SF
106   (V4DF "TARGET_AVX") V2DF])
107
108;; All vector modes
109(define_mode_iterator V
110  [(V32QI "TARGET_AVX") V16QI
111   (V16HI "TARGET_AVX") V8HI
112   (V8SI "TARGET_AVX") V4SI
113   (V4DI "TARGET_AVX") V2DI
114   (V8SF "TARGET_AVX") V4SF
115   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
116
117;; All 128bit vector modes
118(define_mode_iterator V_128
119  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
120
121;; All 256bit vector modes
122(define_mode_iterator V_256
123  [V32QI V16HI V8SI V4DI V8SF V4DF])
124
125;; All vector float modes
126(define_mode_iterator VF
127  [(V8SF "TARGET_AVX") V4SF
128   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
129
130;; All SFmode vector float modes
131(define_mode_iterator VF1
132  [(V8SF "TARGET_AVX") V4SF])
133
134;; All DFmode vector float modes
135(define_mode_iterator VF2
136  [(V4DF "TARGET_AVX") V2DF])
137
138;; All 128bit vector float modes
139(define_mode_iterator VF_128
140  [V4SF (V2DF "TARGET_SSE2")])
141
142;; All 256bit vector float modes
143(define_mode_iterator VF_256
144  [V8SF V4DF])
145
146;; All vector integer modes
147(define_mode_iterator VI
148  [(V32QI "TARGET_AVX") V16QI
149   (V16HI "TARGET_AVX") V8HI
150   (V8SI "TARGET_AVX") V4SI
151   (V4DI "TARGET_AVX") V2DI])
152
153(define_mode_iterator VI_AVX2
154  [(V32QI "TARGET_AVX2") V16QI
155   (V16HI "TARGET_AVX2") V8HI
156   (V8SI "TARGET_AVX2") V4SI
157   (V4DI "TARGET_AVX2") V2DI])
158
159;; All QImode vector integer modes
160(define_mode_iterator VI1
161  [(V32QI "TARGET_AVX") V16QI])
162
163;; All DImode vector integer modes
164(define_mode_iterator VI8
165  [(V4DI "TARGET_AVX") V2DI])
166
167(define_mode_iterator VI1_AVX2
168  [(V32QI "TARGET_AVX2") V16QI])
169
170(define_mode_iterator VI2_AVX2
171  [(V16HI "TARGET_AVX2") V8HI])
172
173(define_mode_iterator VI4_AVX2
174  [(V8SI "TARGET_AVX2") V4SI])
175
176(define_mode_iterator VI8_AVX2
177  [(V4DI "TARGET_AVX2") V2DI])
178
179;; ??? We should probably use TImode instead.
180(define_mode_iterator VIMAX_AVX2
181  [(V2TI "TARGET_AVX2") V1TI])
182
183;; ??? This should probably be dropped in favor of VIMAX_AVX2.
184(define_mode_iterator SSESCALARMODE
185  [(V2TI "TARGET_AVX2") TI])
186
187(define_mode_iterator VI12_AVX2
188  [(V32QI "TARGET_AVX2") V16QI
189   (V16HI "TARGET_AVX2") V8HI])
190
191(define_mode_iterator VI24_AVX2
192  [(V16HI "TARGET_AVX2") V8HI
193   (V8SI "TARGET_AVX2") V4SI])
194
195(define_mode_iterator VI124_AVX2
196  [(V32QI "TARGET_AVX2") V16QI
197   (V16HI "TARGET_AVX2") V8HI
198   (V8SI "TARGET_AVX2") V4SI])
199
200(define_mode_iterator VI248_AVX2
201  [(V16HI "TARGET_AVX2") V8HI
202   (V8SI "TARGET_AVX2") V4SI
203   (V4DI "TARGET_AVX2") V2DI])
204
205(define_mode_iterator VI48_AVX2
206  [(V8SI "TARGET_AVX2") V4SI
207   (V4DI "TARGET_AVX2") V2DI])
208
209(define_mode_iterator V48_AVX2
210  [V4SF V2DF
211   V8SF V4DF
212   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
213   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
214
215(define_mode_attr sse2_avx2
216  [(V16QI "sse2") (V32QI "avx2")
217   (V8HI "sse2") (V16HI "avx2")
218   (V4SI "sse2") (V8SI "avx2")
219   (V2DI "sse2") (V4DI "avx2")
220   (V1TI "sse2") (V2TI "avx2")])
221
222(define_mode_attr ssse3_avx2
223   [(V16QI "ssse3") (V32QI "avx2")
224    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
225    (V4SI "ssse3") (V8SI "avx2")
226    (V2DI "ssse3") (V4DI "avx2")
227    (TI "ssse3") (V2TI "avx2")])
228
229(define_mode_attr sse4_1_avx2
230   [(V16QI "sse4_1") (V32QI "avx2")
231    (V8HI "sse4_1") (V16HI "avx2")
232    (V4SI "sse4_1") (V8SI "avx2")
233    (V2DI "sse4_1") (V4DI "avx2")])
234
235(define_mode_attr avx_avx2
236  [(V4SF "avx") (V2DF "avx")
237   (V8SF "avx") (V4DF "avx")
238   (V4SI "avx2") (V2DI "avx2")
239   (V8SI "avx2") (V4DI "avx2")])
240
241(define_mode_attr vec_avx2
242  [(V16QI "vec") (V32QI "avx2")
243   (V8HI "vec") (V16HI "avx2")
244   (V4SI "vec") (V8SI "avx2")
245   (V2DI "vec") (V4DI "avx2")])
246
247(define_mode_attr ssedoublemode
248  [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
249   (V32QI "V32HI") (V16QI "V16HI")])
250
251(define_mode_attr ssebytemode
252  [(V4DI "V32QI") (V2DI "V16QI")])
253
254;; All 128bit vector integer modes
255(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
256
257;; All 256bit vector integer modes
258(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
259
260;; Random 128bit vector integer mode combinations
261(define_mode_iterator VI12_128 [V16QI V8HI])
262(define_mode_iterator VI14_128 [V16QI V4SI])
263(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
264(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
265(define_mode_iterator VI24_128 [V8HI V4SI])
266(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
267(define_mode_iterator VI48_128 [V4SI V2DI])
268
269;; Random 256bit vector integer mode combinations
270(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
271(define_mode_iterator VI48_256 [V8SI V4DI])
272
273;; Int-float size matches
274(define_mode_iterator VI4F_128 [V4SI V4SF])
275(define_mode_iterator VI8F_128 [V2DI V2DF])
276(define_mode_iterator VI4F_256 [V8SI V8SF])
277(define_mode_iterator VI8F_256 [V4DI V4DF])
278
279;; Mapping from float mode to required SSE level
280(define_mode_attr sse
281  [(SF "sse") (DF "sse2")
282   (V4SF "sse") (V2DF "sse2")
283   (V8SF "avx") (V4DF "avx")])
284
285(define_mode_attr sse2
286  [(V16QI "sse2") (V32QI "avx")
287   (V2DI "sse2") (V4DI "avx")])
288
289(define_mode_attr sse3
290  [(V16QI "sse3") (V32QI "avx")])
291
292(define_mode_attr sse4_1
293  [(V4SF "sse4_1") (V2DF "sse4_1")
294   (V8SF "avx") (V4DF "avx")])
295
296(define_mode_attr avxsizesuffix
297  [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
298   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
299   (V8SF "256") (V4DF "256")
300   (V4SF "") (V2DF "")])
301
302;; SSE instruction mode
303(define_mode_attr sseinsnmode
304  [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
305   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
306   (V8SF "V8SF") (V4DF "V4DF")
307   (V4SF "V4SF") (V2DF "V2DF")
308   (TI "TI")])
309
310;; Mapping of vector float modes to an integer mode of the same size
311(define_mode_attr sseintvecmode
312  [(V8SF "V8SI") (V4DF "V4DI")
313   (V4SF "V4SI") (V2DF "V2DI")
314   (V8SI "V8SI") (V4DI "V4DI")
315   (V4SI "V4SI") (V2DI "V2DI")
316   (V16HI "V16HI") (V8HI "V8HI")
317   (V32QI "V32QI") (V16QI "V16QI")])
318
319(define_mode_attr sseintvecmodelower
320  [(V8SF "v8si") (V4DF "v4di")
321   (V4SF "v4si") (V2DF "v2di")
322   (V8SI "v8si") (V4DI "v4di")
323   (V4SI "v4si") (V2DI "v2di")
324   (V16HI "v16hi") (V8HI "v8hi")
325   (V32QI "v32qi") (V16QI "v16qi")])
326
327;; Mapping of vector modes to a vector mode of double size
328(define_mode_attr ssedoublevecmode
329  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
330   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
331   (V8SF "V16SF") (V4DF "V8DF")
332   (V4SF "V8SF") (V2DF "V4DF")])
333
334;; Mapping of vector modes to a vector mode of half size
335(define_mode_attr ssehalfvecmode
336  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
337   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI")
338   (V8SF "V4SF") (V4DF "V2DF")
339   (V4SF "V2SF")])
340
341;; Mapping of vector modes ti packed single mode of the same size
342(define_mode_attr ssePSmode
343  [(V32QI "V8SF") (V16QI "V4SF")
344   (V16HI "V8SF") (V8HI "V4SF")
345   (V8SI "V8SF") (V4SI "V4SF")
346   (V4DI "V8SF") (V2DI "V4SF")
347   (V2TI "V8SF") (V1TI "V4SF")
348   (V8SF "V8SF") (V4SF "V4SF")
349   (V4DF "V8SF") (V2DF "V4SF")])
350
351;; Mapping of vector modes back to the scalar modes
352(define_mode_attr ssescalarmode
353  [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
354   (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
355   (V8SF "SF") (V4DF "DF")
356   (V4SF "SF") (V2DF "DF")])
357
358;; Number of scalar elements in each vector type
359(define_mode_attr ssescalarnum
360  [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
361   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
362   (V8SF "8") (V4DF "4")
363   (V4SF "4") (V2DF "2")])
364
365;; SSE prefix for integer vector modes
366(define_mode_attr sseintprefix
367  [(V2DI "p") (V2DF "")
368   (V4DI "p") (V4DF "")
369   (V4SI "p") (V4SF "")
370   (V8SI "p") (V8SF "")])
371
372;; SSE scalar suffix for vector modes
373(define_mode_attr ssescalarmodesuffix
374  [(SF "ss") (DF "sd")
375   (V8SF "ss") (V4DF "sd")
376   (V4SF "ss") (V2DF "sd")
377   (V8SI "ss") (V4DI "sd")
378   (V4SI "d")])
379
380;; Pack/unpack vector modes
381(define_mode_attr sseunpackmode
382  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
383   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
384
385(define_mode_attr ssepackmode
386  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
387   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
388
389;; Mapping of the max integer size for xop rotate immediate constraint
390(define_mode_attr sserotatemax
391  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
392
393;; Mapping of mode to cast intrinsic name
394(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
395
396;; Instruction suffix for sign and zero extensions.
397(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
398
399;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
400(define_mode_attr i128
401  [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
402   (V8SI "%~128") (V4DI "%~128")])
403
404;; Mix-n-match
405(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
406
407;; Mapping of immediate bits for blend instructions
408(define_mode_attr blendbits
409  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
410
411;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
412
413;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
414;;
415;; Move patterns
416;;
417;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
418
419;; All of these patterns are enabled for SSE1 as well as SSE2.
420;; This is essential for maintaining stable calling conventions.
421
422(define_expand "mov<mode>"
423  [(set (match_operand:V16 0 "nonimmediate_operand")
424	(match_operand:V16 1 "nonimmediate_operand"))]
425  "TARGET_SSE"
426{
427  ix86_expand_vector_move (<MODE>mode, operands);
428  DONE;
429})
430
431(define_insn "*mov<mode>_internal"
432  [(set (match_operand:V16 0 "nonimmediate_operand"               "=x,x ,m")
433	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
434  "TARGET_SSE
435   && (register_operand (operands[0], <MODE>mode)
436       || register_operand (operands[1], <MODE>mode))"
437{
438  switch (which_alternative)
439    {
440    case 0:
441      return standard_sse_constant_opcode (insn, operands[1]);
442    case 1:
443    case 2:
444      switch (get_attr_mode (insn))
445	{
446	case MODE_V8SF:
447	case MODE_V4SF:
448	  if (TARGET_AVX
449	      && (misaligned_operand (operands[0], <MODE>mode)
450		  || misaligned_operand (operands[1], <MODE>mode)))
451	    return "vmovups\t{%1, %0|%0, %1}";
452	  else
453	    return "%vmovaps\t{%1, %0|%0, %1}";
454
455	case MODE_V4DF:
456	case MODE_V2DF:
457	  if (TARGET_AVX
458	      && (misaligned_operand (operands[0], <MODE>mode)
459		  || misaligned_operand (operands[1], <MODE>mode)))
460	    return "vmovupd\t{%1, %0|%0, %1}";
461	  else
462	    return "%vmovapd\t{%1, %0|%0, %1}";
463
464	case MODE_OI:
465	case MODE_TI:
466	  if (TARGET_AVX
467	      && (misaligned_operand (operands[0], <MODE>mode)
468		  || misaligned_operand (operands[1], <MODE>mode)))
469	    return "vmovdqu\t{%1, %0|%0, %1}";
470	  else
471	    return "%vmovdqa\t{%1, %0|%0, %1}";
472
473	default:
474	  gcc_unreachable ();
475	}
476    default:
477      gcc_unreachable ();
478    }
479}
480  [(set_attr "type" "sselog1,ssemov,ssemov")
481   (set_attr "prefix" "maybe_vex")
482   (set (attr "mode")
483	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
484		 (const_string "<ssePSmode>")
485	       (and (eq_attr "alternative" "2")
486		    (match_test "TARGET_SSE_TYPELESS_STORES"))
487		 (const_string "<ssePSmode>")
488	       (match_test "TARGET_AVX")
489		 (const_string "<sseinsnmode>")
490	       (ior (not (match_test "TARGET_SSE2"))
491		    (match_test "optimize_function_for_size_p (cfun)"))
492		 (const_string "V4SF")
493	       (and (eq_attr "alternative" "0")
494		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
495		 (const_string "TI")
496	      ]
497	      (const_string "<sseinsnmode>")))])
498
499(define_insn "sse2_movq128"
500  [(set (match_operand:V2DI 0 "register_operand" "=x")
501	(vec_concat:V2DI
502	  (vec_select:DI
503	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
504	    (parallel [(const_int 0)]))
505	  (const_int 0)))]
506  "TARGET_SSE2"
507  "%vmovq\t{%1, %0|%0, %1}"
508  [(set_attr "type" "ssemov")
509   (set_attr "prefix" "maybe_vex")
510   (set_attr "mode" "TI")])
511
512;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
513;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
514;; from memory, we'd prefer to load the memory directly into the %xmm
515;; register.  To facilitate this happy circumstance, this pattern won't
516;; split until after register allocation.  If the 64-bit value didn't
517;; come from memory, this is the best we can do.  This is much better
518;; than storing %edx:%eax into a stack temporary and loading an %xmm
519;; from there.
520
521(define_insn_and_split "movdi_to_sse"
522  [(parallel
523    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
524	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
525     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
526  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
527  "#"
528  "&& reload_completed"
529  [(const_int 0)]
530{
531 if (register_operand (operands[1], DImode))
532   {
533      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
534	 Assemble the 64-bit DImode value in an xmm register.  */
535      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
536				  gen_rtx_SUBREG (SImode, operands[1], 0)));
537      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
538				  gen_rtx_SUBREG (SImode, operands[1], 4)));
539      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
540					     operands[2]));
541    }
542 else if (memory_operand (operands[1], DImode))
543   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
544				  operands[1], const0_rtx));
545 else
546   gcc_unreachable ();
547})
548
549(define_split
550  [(set (match_operand:V4SF 0 "register_operand")
551	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
552  "TARGET_SSE && reload_completed"
553  [(set (match_dup 0)
554	(vec_merge:V4SF
555	  (vec_duplicate:V4SF (match_dup 1))
556	  (match_dup 2)
557	  (const_int 1)))]
558{
559  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
560  operands[2] = CONST0_RTX (V4SFmode);
561})
562
563(define_split
564  [(set (match_operand:V2DF 0 "register_operand")
565	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
566  "TARGET_SSE2 && reload_completed"
567  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
568{
569  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
570  operands[2] = CONST0_RTX (DFmode);
571})
572
573(define_expand "push<mode>1"
574  [(match_operand:V16 0 "register_operand")]
575  "TARGET_SSE"
576{
577  ix86_expand_push (<MODE>mode, operands[0]);
578  DONE;
579})
580
581(define_expand "movmisalign<mode>"
582  [(set (match_operand:V16 0 "nonimmediate_operand")
583	(match_operand:V16 1 "nonimmediate_operand"))]
584  "TARGET_SSE"
585{
586  ix86_expand_vector_move_misalign (<MODE>mode, operands);
587  DONE;
588})
589
590(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
591  [(set (match_operand:VF 0 "register_operand" "=x")
592	(unspec:VF
593	  [(match_operand:VF 1 "memory_operand" "m")]
594	  UNSPEC_LOADU))]
595  "TARGET_SSE"
596{
597  switch (get_attr_mode (insn))
598    {
599    case MODE_V8SF:
600    case MODE_V4SF:
601      return "%vmovups\t{%1, %0|%0, %1}";
602    default:
603      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
604    }
605}
606  [(set_attr "type" "ssemov")
607   (set_attr "movu" "1")
608   (set_attr "ssememalign" "8")
609   (set_attr "prefix" "maybe_vex")
610   (set (attr "mode")
611	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
612		 (const_string "<ssePSmode>")
613	       (match_test "TARGET_AVX")
614		 (const_string "<MODE>")
615	       (match_test "optimize_function_for_size_p (cfun)")
616		 (const_string "V4SF")
617	      ]
618	      (const_string "<MODE>")))])
619
620(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
621  [(set (match_operand:VF 0 "memory_operand" "=m")
622	(unspec:VF
623	  [(match_operand:VF 1 "register_operand" "x")]
624	  UNSPEC_STOREU))]
625  "TARGET_SSE"
626{
627  switch (get_attr_mode (insn))
628    {
629    case MODE_V8SF:
630    case MODE_V4SF:
631      return "%vmovups\t{%1, %0|%0, %1}";
632    default:
633      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
634    }
635}
636  [(set_attr "type" "ssemov")
637   (set_attr "movu" "1")
638   (set_attr "ssememalign" "8")
639   (set_attr "prefix" "maybe_vex")
640   (set (attr "mode")
641	(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
642		    (match_test "TARGET_SSE_TYPELESS_STORES"))
643		 (const_string "<ssePSmode>")
644	       (match_test "TARGET_AVX")
645		 (const_string "<MODE>")
646	       (match_test "optimize_function_for_size_p (cfun)")
647		 (const_string "V4SF")
648	      ]
649	      (const_string "<MODE>")))])
650
651(define_insn "<sse2>_loaddqu<avxsizesuffix>"
652  [(set (match_operand:VI1 0 "register_operand" "=x")
653	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
654		    UNSPEC_LOADU))]
655  "TARGET_SSE2"
656{
657  switch (get_attr_mode (insn))
658    {
659    case MODE_V8SF:
660    case MODE_V4SF:
661      return "%vmovups\t{%1, %0|%0, %1}";
662    default:
663      return "%vmovdqu\t{%1, %0|%0, %1}";
664    }
665}
666  [(set_attr "type" "ssemov")
667   (set_attr "movu" "1")
668   (set_attr "ssememalign" "8")
669   (set (attr "prefix_data16")
670     (if_then_else
671       (match_test "TARGET_AVX")
672     (const_string "*")
673     (const_string "1")))
674   (set_attr "prefix" "maybe_vex")
675   (set (attr "mode")
676	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
677		 (const_string "<ssePSmode>")
678	       (match_test "TARGET_AVX")
679		 (const_string "<sseinsnmode>")
680	       (match_test "optimize_function_for_size_p (cfun)")
681	         (const_string "V4SF")
682	      ]
683	      (const_string "<sseinsnmode>")))])
684
685(define_insn "<sse2>_storedqu<avxsizesuffix>"
686  [(set (match_operand:VI1 0 "memory_operand" "=m")
687	(unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
688		    UNSPEC_STOREU))]
689  "TARGET_SSE2"
690{
691  switch (get_attr_mode (insn))
692    {
693    case MODE_V8SF:
694    case MODE_V4SF:
695      return "%vmovups\t{%1, %0|%0, %1}";
696    default:
697      return "%vmovdqu\t{%1, %0|%0, %1}";
698    }
699}
700  [(set_attr "type" "ssemov")
701   (set_attr "movu" "1")
702   (set_attr "ssememalign" "8")
703   (set (attr "prefix_data16")
704     (if_then_else
705       (match_test "TARGET_AVX")
706     (const_string "*")
707     (const_string "1")))
708   (set_attr "prefix" "maybe_vex")
709   (set (attr "mode")
710	(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
711		    (match_test "TARGET_SSE_TYPELESS_STORES"))
712		 (const_string "<ssePSmode>")
713	       (match_test "TARGET_AVX")
714		 (const_string "<sseinsnmode>")
715	       (match_test "optimize_function_for_size_p (cfun)")
716	         (const_string "V4SF")
717	      ]
718	      (const_string "<sseinsnmode>")))])
719
720(define_insn "<sse3>_lddqu<avxsizesuffix>"
721  [(set (match_operand:VI1 0 "register_operand" "=x")
722	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
723		    UNSPEC_LDDQU))]
724  "TARGET_SSE3"
725  "%vlddqu\t{%1, %0|%0, %1}"
726  [(set_attr "type" "ssemov")
727   (set_attr "movu" "1")
728   (set_attr "ssememalign" "8")
729   (set (attr "prefix_data16")
730     (if_then_else
731       (match_test "TARGET_AVX")
732     (const_string "*")
733     (const_string "0")))
734   (set (attr "prefix_rep")
735     (if_then_else
736       (match_test "TARGET_AVX")
737     (const_string "*")
738     (const_string "1")))
739   (set_attr "prefix" "maybe_vex")
740   (set_attr "mode" "<sseinsnmode>")])
741
742(define_insn "sse2_movnti<mode>"
743  [(set (match_operand:SWI48 0 "memory_operand" "=m")
744	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
745		      UNSPEC_MOVNT))]
746  "TARGET_SSE2"
747  "movnti\t{%1, %0|%0, %1}"
748  [(set_attr "type" "ssemov")
749   (set_attr "prefix_data16" "0")
750   (set_attr "mode" "<MODE>")])
751
752(define_insn "<sse>_movnt<mode>"
753  [(set (match_operand:VF 0 "memory_operand" "=m")
754	(unspec:VF [(match_operand:VF 1 "register_operand" "x")]
755		   UNSPEC_MOVNT))]
756  "TARGET_SSE"
757  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
758  [(set_attr "type" "ssemov")
759   (set_attr "prefix" "maybe_vex")
760   (set_attr "mode" "<MODE>")])
761
762(define_insn "<sse2>_movnt<mode>"
763  [(set (match_operand:VI8 0 "memory_operand" "=m")
764	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
765		    UNSPEC_MOVNT))]
766  "TARGET_SSE2"
767  "%vmovntdq\t{%1, %0|%0, %1}"
768  [(set_attr "type" "ssecvt")
769   (set (attr "prefix_data16")
770     (if_then_else
771       (match_test "TARGET_AVX")
772     (const_string "*")
773     (const_string "1")))
774   (set_attr "prefix" "maybe_vex")
775   (set_attr "mode" "<sseinsnmode>")])
776
777; Expand patterns for non-temporal stores.  At the moment, only those
778; that directly map to insns are defined; it would be possible to
779; define patterns for other modes that would expand to several insns.
780
781;; Modes handled by storent patterns.
782(define_mode_iterator STORENT_MODE
783  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
784   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
785   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
786   (V8SF "TARGET_AVX") V4SF
787   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
788
789(define_expand "storent<mode>"
790  [(set (match_operand:STORENT_MODE 0 "memory_operand")
791	(unspec:STORENT_MODE
792	  [(match_operand:STORENT_MODE 1 "register_operand")]
793	  UNSPEC_MOVNT))]
794  "TARGET_SSE")
795
796;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797;;
798;; Parallel floating point arithmetic
799;;
800;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
801
802(define_expand "<code><mode>2"
803  [(set (match_operand:VF 0 "register_operand")
804	(absneg:VF
805	  (match_operand:VF 1 "register_operand")))]
806  "TARGET_SSE"
807  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
808
809(define_insn_and_split "*absneg<mode>2"
810  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
811	(match_operator:VF 3 "absneg_operator"
812	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
813   (use (match_operand:VF 2 "nonimmediate_operand"    "xm,0, xm,x"))]
814  "TARGET_SSE"
815  "#"
816  "&& reload_completed"
817  [(const_int 0)]
818{
819  enum rtx_code absneg_op;
820  rtx op1, op2;
821  rtx t;
822
823  if (TARGET_AVX)
824    {
825      if (MEM_P (operands[1]))
826	op1 = operands[2], op2 = operands[1];
827      else
828	op1 = operands[1], op2 = operands[2];
829    }
830  else
831    {
832      op1 = operands[0];
833      if (rtx_equal_p (operands[0], operands[1]))
834	op2 = operands[2];
835      else
836	op2 = operands[1];
837    }
838
839  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
840  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
841  t = gen_rtx_SET (VOIDmode, operands[0], t);
842  emit_insn (t);
843  DONE;
844}
845  [(set_attr "isa" "noavx,noavx,avx,avx")])
846
847(define_expand "<plusminus_insn><mode>3"
848  [(set (match_operand:VF 0 "register_operand")
849	(plusminus:VF
850	  (match_operand:VF 1 "nonimmediate_operand")
851	  (match_operand:VF 2 "nonimmediate_operand")))]
852  "TARGET_SSE"
853  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
854
855(define_insn "*<plusminus_insn><mode>3"
856  [(set (match_operand:VF 0 "register_operand" "=x,x")
857	(plusminus:VF
858	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
859	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
860  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
861  "@
862   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
863   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
864  [(set_attr "isa" "noavx,avx")
865   (set_attr "type" "sseadd")
866   (set_attr "prefix" "orig,vex")
867   (set_attr "mode" "<MODE>")])
868
869(define_insn "<sse>_vm<plusminus_insn><mode>3"
870  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
871	(vec_merge:VF_128
872	  (plusminus:VF_128
873	    (match_operand:VF_128 1 "register_operand" "0,x")
874	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
875	  (match_dup 1)
876	  (const_int 1)))]
877  "TARGET_SSE"
878  "@
879   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
880   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
881  [(set_attr "isa" "noavx,avx")
882   (set_attr "type" "sseadd")
883   (set_attr "prefix" "orig,vex")
884   (set_attr "mode" "<ssescalarmode>")])
885
886(define_expand "mul<mode>3"
887  [(set (match_operand:VF 0 "register_operand")
888	(mult:VF
889	  (match_operand:VF 1 "nonimmediate_operand")
890	  (match_operand:VF 2 "nonimmediate_operand")))]
891  "TARGET_SSE"
892  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
893
894(define_insn "*mul<mode>3"
895  [(set (match_operand:VF 0 "register_operand" "=x,x")
896	(mult:VF
897	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
898	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
899  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
900  "@
901   mul<ssemodesuffix>\t{%2, %0|%0, %2}
902   vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
903  [(set_attr "isa" "noavx,avx")
904   (set_attr "type" "ssemul")
905   (set_attr "prefix" "orig,vex")
906   (set_attr "btver2_decode" "direct,double")
907   (set_attr "mode" "<MODE>")])
908
909(define_insn "<sse>_vmmul<mode>3"
910  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
911	(vec_merge:VF_128
912	  (mult:VF_128
913	    (match_operand:VF_128 1 "register_operand" "0,x")
914	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
915	  (match_dup 1)
916	  (const_int 1)))]
917  "TARGET_SSE"
918  "@
919   mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
920   vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
921  [(set_attr "isa" "noavx,avx")
922   (set_attr "type" "ssemul")
923   (set_attr "prefix" "orig,vex")
924   (set_attr "mode" "<ssescalarmode>")])
925
926(define_expand "div<mode>3"
927  [(set (match_operand:VF2 0 "register_operand")
928	(div:VF2 (match_operand:VF2 1 "register_operand")
929		 (match_operand:VF2 2 "nonimmediate_operand")))]
930  "TARGET_SSE2"
931  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
932
933(define_expand "div<mode>3"
934  [(set (match_operand:VF1 0 "register_operand")
935	(div:VF1 (match_operand:VF1 1 "register_operand")
936		 (match_operand:VF1 2 "nonimmediate_operand")))]
937  "TARGET_SSE"
938{
939  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
940
941  if (TARGET_SSE_MATH
942      && TARGET_RECIP_VEC_DIV
943      && !optimize_insn_for_size_p ()
944      && flag_finite_math_only && !flag_trapping_math
945      && flag_unsafe_math_optimizations)
946    {
947      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
948      DONE;
949    }
950})
951
952(define_insn "<sse>_div<mode>3"
953  [(set (match_operand:VF 0 "register_operand" "=x,x")
954	(div:VF
955	  (match_operand:VF 1 "register_operand" "0,x")
956	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
957  "TARGET_SSE"
958  "@
959   div<ssemodesuffix>\t{%2, %0|%0, %2}
960   vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
961  [(set_attr "isa" "noavx,avx")
962   (set_attr "type" "ssediv")
963   (set_attr "prefix" "orig,vex")
964   (set_attr "mode" "<MODE>")])
965
966(define_insn "<sse>_vmdiv<mode>3"
967  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
968	(vec_merge:VF_128
969	  (div:VF_128
970	    (match_operand:VF_128 1 "register_operand" "0,x")
971	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
972	  (match_dup 1)
973	  (const_int 1)))]
974  "TARGET_SSE"
975  "@
976   div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
977   vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
978  [(set_attr "isa" "noavx,avx")
979   (set_attr "type" "ssediv")
980   (set_attr "prefix" "orig,vex")
981   (set_attr "btver2_decode" "direct,double")
982   (set_attr "mode" "<ssescalarmode>")])
983
984(define_insn "<sse>_rcp<mode>2"
985  [(set (match_operand:VF1 0 "register_operand" "=x")
986	(unspec:VF1
987	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
988  "TARGET_SSE"
989  "%vrcpps\t{%1, %0|%0, %1}"
990  [(set_attr "type" "sse")
991   (set_attr "atom_sse_attr" "rcp")
992   (set_attr "btver2_sse_attr" "rcp")
993   (set_attr "prefix" "maybe_vex")
994   (set_attr "mode" "<MODE>")])
995
996(define_insn "sse_vmrcpv4sf2"
997  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
998	(vec_merge:V4SF
999	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1000		       UNSPEC_RCP)
1001	  (match_operand:V4SF 2 "register_operand" "0,x")
1002	  (const_int 1)))]
1003  "TARGET_SSE"
1004  "@
1005   rcpss\t{%1, %0|%0, %1}
1006   vrcpss\t{%1, %2, %0|%0, %2, %1}"
1007  [(set_attr "isa" "noavx,avx")
1008   (set_attr "type" "sse")
1009   (set_attr "ssememalign" "32")
1010   (set_attr "atom_sse_attr" "rcp")
1011   (set_attr "btver2_sse_attr" "rcp")
1012   (set_attr "prefix" "orig,vex")
1013   (set_attr "mode" "SF")])
1014
1015(define_expand "sqrt<mode>2"
1016  [(set (match_operand:VF2 0 "register_operand")
1017	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1018  "TARGET_SSE2")
1019
1020(define_expand "sqrt<mode>2"
1021  [(set (match_operand:VF1 0 "register_operand")
1022	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1023  "TARGET_SSE"
1024{
1025  if (TARGET_SSE_MATH
1026      && TARGET_RECIP_VEC_SQRT
1027      && !optimize_insn_for_size_p ()
1028      && flag_finite_math_only && !flag_trapping_math
1029      && flag_unsafe_math_optimizations)
1030    {
1031      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1032      DONE;
1033    }
1034})
1035
1036(define_insn "<sse>_sqrt<mode>2"
1037  [(set (match_operand:VF 0 "register_operand" "=x")
1038	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
1039  "TARGET_SSE"
1040  "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
1041  [(set_attr "type" "sse")
1042   (set_attr "atom_sse_attr" "sqrt")
1043   (set_attr "btver2_sse_attr" "sqrt")
1044   (set_attr "prefix" "maybe_vex")
1045   (set_attr "mode" "<MODE>")])
1046
1047(define_insn "<sse>_vmsqrt<mode>2"
1048  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1049	(vec_merge:VF_128
1050	  (sqrt:VF_128
1051	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
1052	  (match_operand:VF_128 2 "register_operand" "0,x")
1053	  (const_int 1)))]
1054  "TARGET_SSE"
1055  "@
1056   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
1057   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1058  [(set_attr "isa" "noavx,avx")
1059   (set_attr "type" "sse")
1060   (set_attr "atom_sse_attr" "sqrt")
1061   (set_attr "btver2_sse_attr" "sqrt")
1062   (set_attr "prefix" "orig,vex")
1063   (set_attr "mode" "<ssescalarmode>")])
1064
1065(define_expand "rsqrt<mode>2"
1066  [(set (match_operand:VF1 0 "register_operand")
1067	(unspec:VF1
1068	  [(match_operand:VF1 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1069  "TARGET_SSE_MATH"
1070{
1071  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1072  DONE;
1073})
1074
1075(define_insn "<sse>_rsqrt<mode>2"
1076  [(set (match_operand:VF1 0 "register_operand" "=x")
1077	(unspec:VF1
1078	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1079  "TARGET_SSE"
1080  "%vrsqrtps\t{%1, %0|%0, %1}"
1081  [(set_attr "type" "sse")
1082   (set_attr "prefix" "maybe_vex")
1083   (set_attr "mode" "<MODE>")])
1084
1085(define_insn "sse_vmrsqrtv4sf2"
1086  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1087	(vec_merge:V4SF
1088	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1089		       UNSPEC_RSQRT)
1090	  (match_operand:V4SF 2 "register_operand" "0,x")
1091	  (const_int 1)))]
1092  "TARGET_SSE"
1093  "@
1094   rsqrtss\t{%1, %0|%0, %1}
1095   vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1096  [(set_attr "isa" "noavx,avx")
1097   (set_attr "type" "sse")
1098   (set_attr "ssememalign" "32")
1099   (set_attr "prefix" "orig,vex")
1100   (set_attr "mode" "SF")])
1101
1102;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1103;; isn't really correct, as those rtl operators aren't defined when
1104;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1105
1106(define_expand "<code><mode>3"
1107  [(set (match_operand:VF 0 "register_operand")
1108	(smaxmin:VF
1109	  (match_operand:VF 1 "nonimmediate_operand")
1110	  (match_operand:VF 2 "nonimmediate_operand")))]
1111  "TARGET_SSE"
1112{
1113  if (!flag_finite_math_only)
1114    operands[1] = force_reg (<MODE>mode, operands[1]);
1115  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1116})
1117
1118(define_insn "*<code><mode>3_finite"
1119  [(set (match_operand:VF 0 "register_operand" "=x,x")
1120	(smaxmin:VF
1121	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1122	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1123  "TARGET_SSE && flag_finite_math_only
1124   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1125  "@
1126   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1127   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1128  [(set_attr "isa" "noavx,avx")
1129   (set_attr "type" "sseadd")
1130   (set_attr "btver2_sse_attr" "maxmin")
1131   (set_attr "prefix" "orig,vex")
1132   (set_attr "mode" "<MODE>")])
1133
1134(define_insn "*<code><mode>3"
1135  [(set (match_operand:VF 0 "register_operand" "=x,x")
1136	(smaxmin:VF
1137	  (match_operand:VF 1 "register_operand" "0,x")
1138	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1139  "TARGET_SSE && !flag_finite_math_only"
1140  "@
1141   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1142   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1143  [(set_attr "isa" "noavx,avx")
1144   (set_attr "type" "sseadd")
1145   (set_attr "btver2_sse_attr" "maxmin")
1146   (set_attr "prefix" "orig,vex")
1147   (set_attr "mode" "<MODE>")])
1148
1149(define_insn "<sse>_vm<code><mode>3"
1150  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1151	(vec_merge:VF_128
1152	  (smaxmin:VF_128
1153	    (match_operand:VF_128 1 "register_operand" "0,x")
1154	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1155	 (match_dup 1)
1156	 (const_int 1)))]
1157  "TARGET_SSE"
1158  "@
1159   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1160   v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1161  [(set_attr "isa" "noavx,avx")
1162   (set_attr "type" "sse")
1163   (set_attr "btver2_sse_attr" "maxmin")
1164   (set_attr "prefix" "orig,vex")
1165   (set_attr "mode" "<ssescalarmode>")])
1166
1167;; These versions of the min/max patterns implement exactly the operations
1168;;   min = (op1 < op2 ? op1 : op2)
1169;;   max = (!(op1 < op2) ? op1 : op2)
1170;; Their operands are not commutative, and thus they may be used in the
1171;; presence of -0.0 and NaN.
1172
1173(define_insn "*ieee_smin<mode>3"
1174  [(set (match_operand:VF 0 "register_operand" "=x,x")
1175	(unspec:VF
1176	  [(match_operand:VF 1 "register_operand" "0,x")
1177	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1178	 UNSPEC_IEEE_MIN))]
1179  "TARGET_SSE"
1180  "@
1181   min<ssemodesuffix>\t{%2, %0|%0, %2}
1182   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1183  [(set_attr "isa" "noavx,avx")
1184   (set_attr "type" "sseadd")
1185   (set_attr "prefix" "orig,vex")
1186   (set_attr "mode" "<MODE>")])
1187
1188(define_insn "*ieee_smax<mode>3"
1189  [(set (match_operand:VF 0 "register_operand" "=x,x")
1190	(unspec:VF
1191	  [(match_operand:VF 1 "register_operand" "0,x")
1192	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1193	 UNSPEC_IEEE_MAX))]
1194  "TARGET_SSE"
1195  "@
1196   max<ssemodesuffix>\t{%2, %0|%0, %2}
1197   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1198  [(set_attr "isa" "noavx,avx")
1199   (set_attr "type" "sseadd")
1200   (set_attr "prefix" "orig,vex")
1201   (set_attr "mode" "<MODE>")])
1202
1203(define_insn "avx_addsubv4df3"
1204  [(set (match_operand:V4DF 0 "register_operand" "=x")
1205	(vec_merge:V4DF
1206	  (plus:V4DF
1207	    (match_operand:V4DF 1 "register_operand" "x")
1208	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1209	  (minus:V4DF (match_dup 1) (match_dup 2))
1210	  (const_int 10)))]
1211  "TARGET_AVX"
1212  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1213  [(set_attr "type" "sseadd")
1214   (set_attr "prefix" "vex")
1215   (set_attr "mode" "V4DF")])
1216
1217(define_insn "sse3_addsubv2df3"
1218  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1219	(vec_merge:V2DF
1220	  (plus:V2DF
1221	    (match_operand:V2DF 1 "register_operand" "0,x")
1222	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1223	  (minus:V2DF (match_dup 1) (match_dup 2))
1224	  (const_int 2)))]
1225  "TARGET_SSE3"
1226  "@
1227   addsubpd\t{%2, %0|%0, %2}
1228   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1229  [(set_attr "isa" "noavx,avx")
1230   (set_attr "type" "sseadd")
1231   (set_attr "atom_unit" "complex")
1232   (set_attr "prefix" "orig,vex")
1233   (set_attr "mode" "V2DF")])
1234
1235(define_insn "avx_addsubv8sf3"
1236  [(set (match_operand:V8SF 0 "register_operand" "=x")
1237	(vec_merge:V8SF
1238	  (plus:V8SF
1239	    (match_operand:V8SF 1 "register_operand" "x")
1240	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1241	  (minus:V8SF (match_dup 1) (match_dup 2))
1242	  (const_int 170)))]
1243  "TARGET_AVX"
1244  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1245  [(set_attr "type" "sseadd")
1246   (set_attr "prefix" "vex")
1247   (set_attr "mode" "V8SF")])
1248
1249(define_insn "sse3_addsubv4sf3"
1250  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1251	(vec_merge:V4SF
1252	  (plus:V4SF
1253	    (match_operand:V4SF 1 "register_operand" "0,x")
1254	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1255	  (minus:V4SF (match_dup 1) (match_dup 2))
1256	  (const_int 10)))]
1257  "TARGET_SSE3"
1258  "@
1259   addsubps\t{%2, %0|%0, %2}
1260   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1261  [(set_attr "isa" "noavx,avx")
1262   (set_attr "type" "sseadd")
1263   (set_attr "prefix" "orig,vex")
1264   (set_attr "prefix_rep" "1,*")
1265   (set_attr "mode" "V4SF")])
1266
1267(define_insn "avx_h<plusminus_insn>v4df3"
1268  [(set (match_operand:V4DF 0 "register_operand" "=x")
1269	(vec_concat:V4DF
1270	  (vec_concat:V2DF
1271	    (plusminus:DF
1272	      (vec_select:DF
1273		(match_operand:V4DF 1 "register_operand" "x")
1274		(parallel [(const_int 0)]))
1275	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1276	    (plusminus:DF
1277	      (vec_select:DF
1278		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
1279		(parallel [(const_int 0)]))
1280	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1281	  (vec_concat:V2DF
1282	    (plusminus:DF
1283	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1284	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1285	    (plusminus:DF
1286	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1287	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1288  "TARGET_AVX"
1289  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1290  [(set_attr "type" "sseadd")
1291   (set_attr "prefix" "vex")
1292   (set_attr "mode" "V4DF")])
1293
1294(define_expand "sse3_haddv2df3"
1295  [(set (match_operand:V2DF 0 "register_operand")
1296	(vec_concat:V2DF
1297	  (plus:DF
1298	    (vec_select:DF
1299	      (match_operand:V2DF 1 "register_operand")
1300	      (parallel [(const_int 0)]))
1301	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1302	  (plus:DF
1303	    (vec_select:DF
1304	      (match_operand:V2DF 2 "nonimmediate_operand")
1305	      (parallel [(const_int 0)]))
1306	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1307  "TARGET_SSE3")
1308
1309(define_insn "*sse3_haddv2df3"
1310  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1311	(vec_concat:V2DF
1312	  (plus:DF
1313	    (vec_select:DF
1314	      (match_operand:V2DF 1 "register_operand" "0,x")
1315	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1316	    (vec_select:DF
1317	      (match_dup 1)
1318	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1319	  (plus:DF
1320	    (vec_select:DF
1321	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1322	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1323	    (vec_select:DF
1324	      (match_dup 2)
1325	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1326  "TARGET_SSE3
1327   && INTVAL (operands[3]) != INTVAL (operands[4])
1328   && INTVAL (operands[5]) != INTVAL (operands[6])"
1329  "@
1330   haddpd\t{%2, %0|%0, %2}
1331   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1332  [(set_attr "isa" "noavx,avx")
1333   (set_attr "type" "sseadd")
1334   (set_attr "prefix" "orig,vex")
1335   (set_attr "mode" "V2DF")])
1336
1337(define_insn "sse3_hsubv2df3"
1338  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1339	(vec_concat:V2DF
1340	  (minus:DF
1341	    (vec_select:DF
1342	      (match_operand:V2DF 1 "register_operand" "0,x")
1343	      (parallel [(const_int 0)]))
1344	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1345	  (minus:DF
1346	    (vec_select:DF
1347	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1348	      (parallel [(const_int 0)]))
1349	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1350  "TARGET_SSE3"
1351  "@
1352   hsubpd\t{%2, %0|%0, %2}
1353   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1354  [(set_attr "isa" "noavx,avx")
1355   (set_attr "type" "sseadd")
1356   (set_attr "prefix" "orig,vex")
1357   (set_attr "mode" "V2DF")])
1358
1359(define_insn "*sse3_haddv2df3_low"
1360  [(set (match_operand:DF 0 "register_operand" "=x,x")
1361	(plus:DF
1362	  (vec_select:DF
1363	    (match_operand:V2DF 1 "register_operand" "0,x")
1364	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1365	  (vec_select:DF
1366	    (match_dup 1)
1367	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1368  "TARGET_SSE3
1369   && INTVAL (operands[2]) != INTVAL (operands[3])"
1370  "@
1371   haddpd\t{%0, %0|%0, %0}
1372   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1373  [(set_attr "isa" "noavx,avx")
1374   (set_attr "type" "sseadd1")
1375   (set_attr "prefix" "orig,vex")
1376   (set_attr "mode" "V2DF")])
1377
1378(define_insn "*sse3_hsubv2df3_low"
1379  [(set (match_operand:DF 0 "register_operand" "=x,x")
1380	(minus:DF
1381	  (vec_select:DF
1382	    (match_operand:V2DF 1 "register_operand" "0,x")
1383	    (parallel [(const_int 0)]))
1384	  (vec_select:DF
1385	    (match_dup 1)
1386	    (parallel [(const_int 1)]))))]
1387  "TARGET_SSE3"
1388  "@
1389   hsubpd\t{%0, %0|%0, %0}
1390   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1391  [(set_attr "isa" "noavx,avx")
1392   (set_attr "type" "sseadd1")
1393   (set_attr "prefix" "orig,vex")
1394   (set_attr "mode" "V2DF")])
1395
1396(define_insn "avx_h<plusminus_insn>v8sf3"
1397  [(set (match_operand:V8SF 0 "register_operand" "=x")
1398	(vec_concat:V8SF
1399	  (vec_concat:V4SF
1400	    (vec_concat:V2SF
1401	      (plusminus:SF
1402		(vec_select:SF
1403		  (match_operand:V8SF 1 "register_operand" "x")
1404		  (parallel [(const_int 0)]))
1405		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1406	      (plusminus:SF
1407		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1408		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1409	    (vec_concat:V2SF
1410	      (plusminus:SF
1411		(vec_select:SF
1412		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1413		  (parallel [(const_int 0)]))
1414		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1415	      (plusminus:SF
1416		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1417		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1418	  (vec_concat:V4SF
1419	    (vec_concat:V2SF
1420	      (plusminus:SF
1421		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1422		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1423	      (plusminus:SF
1424		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1425		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1426	    (vec_concat:V2SF
1427	      (plusminus:SF
1428		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1429		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1430	      (plusminus:SF
1431		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1432		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1433  "TARGET_AVX"
1434  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1435  [(set_attr "type" "sseadd")
1436   (set_attr "prefix" "vex")
1437   (set_attr "mode" "V8SF")])
1438
1439(define_insn "sse3_h<plusminus_insn>v4sf3"
1440  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1441	(vec_concat:V4SF
1442	  (vec_concat:V2SF
1443	    (plusminus:SF
1444	      (vec_select:SF
1445		(match_operand:V4SF 1 "register_operand" "0,x")
1446		(parallel [(const_int 0)]))
1447	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1448	    (plusminus:SF
1449	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1450	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1451	  (vec_concat:V2SF
1452	    (plusminus:SF
1453	      (vec_select:SF
1454		(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1455		(parallel [(const_int 0)]))
1456	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1457	    (plusminus:SF
1458	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1459	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1460  "TARGET_SSE3"
1461  "@
1462   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1463   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1464  [(set_attr "isa" "noavx,avx")
1465   (set_attr "type" "sseadd")
1466   (set_attr "atom_unit" "complex")
1467   (set_attr "prefix" "orig,vex")
1468   (set_attr "prefix_rep" "1,*")
1469   (set_attr "mode" "V4SF")])
1470
1471(define_expand "reduc_splus_v4df"
1472  [(match_operand:V4DF 0 "register_operand")
1473   (match_operand:V4DF 1 "register_operand")]
1474  "TARGET_AVX"
1475{
1476  rtx tmp = gen_reg_rtx (V4DFmode);
1477  rtx tmp2 = gen_reg_rtx (V4DFmode);
1478  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1479  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1480  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1481  DONE;
1482})
1483
1484(define_expand "reduc_splus_v2df"
1485  [(match_operand:V2DF 0 "register_operand")
1486   (match_operand:V2DF 1 "register_operand")]
1487  "TARGET_SSE3"
1488{
1489  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1490  DONE;
1491})
1492
1493(define_expand "reduc_splus_v8sf"
1494  [(match_operand:V8SF 0 "register_operand")
1495   (match_operand:V8SF 1 "register_operand")]
1496  "TARGET_AVX"
1497{
1498  rtx tmp = gen_reg_rtx (V8SFmode);
1499  rtx tmp2 = gen_reg_rtx (V8SFmode);
1500  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1501  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1502  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1503  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1504  DONE;
1505})
1506
1507(define_expand "reduc_splus_v4sf"
1508  [(match_operand:V4SF 0 "register_operand")
1509   (match_operand:V4SF 1 "register_operand")]
1510  "TARGET_SSE"
1511{
1512  if (TARGET_SSE3)
1513    {
1514      rtx tmp = gen_reg_rtx (V4SFmode);
1515      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1516      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1517    }
1518  else
1519    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1520  DONE;
1521})
1522
1523;; Modes handled by reduc_sm{in,ax}* patterns.
1524(define_mode_iterator REDUC_SMINMAX_MODE
1525  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1526   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1527   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1528   (V4SF "TARGET_SSE")])
1529
1530(define_expand "reduc_<code>_<mode>"
1531  [(smaxmin:REDUC_SMINMAX_MODE
1532     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1533     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1534  ""
1535{
1536  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1537  DONE;
1538})
1539
1540(define_expand "reduc_<code>_<mode>"
1541  [(umaxmin:VI_256
1542     (match_operand:VI_256 0 "register_operand")
1543     (match_operand:VI_256 1 "register_operand"))]
1544  "TARGET_AVX2"
1545{
1546  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1547  DONE;
1548})
1549
1550(define_expand "reduc_umin_v8hi"
1551  [(umin:V8HI
1552     (match_operand:V8HI 0 "register_operand")
1553     (match_operand:V8HI 1 "register_operand"))]
1554  "TARGET_SSE4_1"
1555{
1556  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1557  DONE;
1558})
1559
1560;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1561;;
1562;; Parallel floating point comparisons
1563;;
1564;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1565
1566(define_insn "avx_cmp<mode>3"
1567  [(set (match_operand:VF 0 "register_operand" "=x")
1568	(unspec:VF
1569	  [(match_operand:VF 1 "register_operand" "x")
1570	   (match_operand:VF 2 "nonimmediate_operand" "xm")
1571	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
1572	  UNSPEC_PCMP))]
1573  "TARGET_AVX"
1574  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1575  [(set_attr "type" "ssecmp")
1576   (set_attr "length_immediate" "1")
1577   (set_attr "prefix" "vex")
1578   (set_attr "mode" "<MODE>")])
1579
1580(define_insn "avx_vmcmp<mode>3"
1581  [(set (match_operand:VF_128 0 "register_operand" "=x")
1582	(vec_merge:VF_128
1583	  (unspec:VF_128
1584	    [(match_operand:VF_128 1 "register_operand" "x")
1585	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1586	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
1587	    UNSPEC_PCMP)
1588	 (match_dup 1)
1589	 (const_int 1)))]
1590  "TARGET_AVX"
1591  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1592  [(set_attr "type" "ssecmp")
1593   (set_attr "length_immediate" "1")
1594   (set_attr "prefix" "vex")
1595   (set_attr "mode" "<ssescalarmode>")])
1596
1597(define_insn "*<sse>_maskcmp<mode>3_comm"
1598  [(set (match_operand:VF 0 "register_operand" "=x,x")
1599	(match_operator:VF 3 "sse_comparison_operator"
1600	  [(match_operand:VF 1 "register_operand" "%0,x")
1601	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1602  "TARGET_SSE
1603   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1604  "@
1605   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1606   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1607  [(set_attr "isa" "noavx,avx")
1608   (set_attr "type" "ssecmp")
1609   (set_attr "length_immediate" "1")
1610   (set_attr "prefix" "orig,vex")
1611   (set_attr "mode" "<MODE>")])
1612
1613(define_insn "<sse>_maskcmp<mode>3"
1614  [(set (match_operand:VF 0 "register_operand" "=x,x")
1615	(match_operator:VF 3 "sse_comparison_operator"
1616	  [(match_operand:VF 1 "register_operand" "0,x")
1617	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1618  "TARGET_SSE"
1619  "@
1620   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1621   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1622  [(set_attr "isa" "noavx,avx")
1623   (set_attr "type" "ssecmp")
1624   (set_attr "length_immediate" "1")
1625   (set_attr "prefix" "orig,vex")
1626   (set_attr "mode" "<MODE>")])
1627
1628(define_insn "<sse>_vmmaskcmp<mode>3"
1629  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1630	(vec_merge:VF_128
1631	 (match_operator:VF_128 3 "sse_comparison_operator"
1632	   [(match_operand:VF_128 1 "register_operand" "0,x")
1633	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1634	 (match_dup 1)
1635	 (const_int 1)))]
1636  "TARGET_SSE"
1637  "@
1638   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1639   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1640  [(set_attr "isa" "noavx,avx")
1641   (set_attr "type" "ssecmp")
1642   (set_attr "length_immediate" "1,*")
1643   (set_attr "prefix" "orig,vex")
1644   (set_attr "mode" "<ssescalarmode>")])
1645
1646(define_insn "<sse>_comi"
1647  [(set (reg:CCFP FLAGS_REG)
1648	(compare:CCFP
1649	  (vec_select:MODEF
1650	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1651	    (parallel [(const_int 0)]))
1652	  (vec_select:MODEF
1653	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1654	    (parallel [(const_int 0)]))))]
1655  "SSE_FLOAT_MODE_P (<MODE>mode)"
1656  "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1657  [(set_attr "type" "ssecomi")
1658   (set_attr "prefix" "maybe_vex")
1659   (set_attr "prefix_rep" "0")
1660   (set (attr "prefix_data16")
1661	(if_then_else (eq_attr "mode" "DF")
1662		      (const_string "1")
1663		      (const_string "0")))
1664   (set_attr "mode" "<MODE>")])
1665
1666(define_insn "<sse>_ucomi"
1667  [(set (reg:CCFPU FLAGS_REG)
1668	(compare:CCFPU
1669	  (vec_select:MODEF
1670	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1671	    (parallel [(const_int 0)]))
1672	  (vec_select:MODEF
1673	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1674	    (parallel [(const_int 0)]))))]
1675  "SSE_FLOAT_MODE_P (<MODE>mode)"
1676  "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1677  [(set_attr "type" "ssecomi")
1678   (set_attr "prefix" "maybe_vex")
1679   (set_attr "prefix_rep" "0")
1680   (set (attr "prefix_data16")
1681	(if_then_else (eq_attr "mode" "DF")
1682		      (const_string "1")
1683		      (const_string "0")))
1684   (set_attr "mode" "<MODE>")])
1685
1686(define_expand "vcond<V_256:mode><VF_256:mode>"
1687  [(set (match_operand:V_256 0 "register_operand")
1688	(if_then_else:V_256
1689	  (match_operator 3 ""
1690	    [(match_operand:VF_256 4 "nonimmediate_operand")
1691	     (match_operand:VF_256 5 "nonimmediate_operand")])
1692	  (match_operand:V_256 1 "general_operand")
1693	  (match_operand:V_256 2 "general_operand")))]
1694  "TARGET_AVX
1695   && (GET_MODE_NUNITS (<V_256:MODE>mode)
1696       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1697{
1698  bool ok = ix86_expand_fp_vcond (operands);
1699  gcc_assert (ok);
1700  DONE;
1701})
1702
1703(define_expand "vcond<V_128:mode><VF_128:mode>"
1704  [(set (match_operand:V_128 0 "register_operand")
1705	(if_then_else:V_128
1706	  (match_operator 3 ""
1707	    [(match_operand:VF_128 4 "nonimmediate_operand")
1708	     (match_operand:VF_128 5 "nonimmediate_operand")])
1709	  (match_operand:V_128 1 "general_operand")
1710	  (match_operand:V_128 2 "general_operand")))]
1711  "TARGET_SSE
1712   && (GET_MODE_NUNITS (<V_128:MODE>mode)
1713       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1714{
1715  bool ok = ix86_expand_fp_vcond (operands);
1716  gcc_assert (ok);
1717  DONE;
1718})
1719
1720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721;;
1722;; Parallel floating point logical operations
1723;;
1724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1725
1726(define_insn "<sse>_andnot<mode>3"
1727  [(set (match_operand:VF 0 "register_operand" "=x,x")
1728	(and:VF
1729	  (not:VF
1730	    (match_operand:VF 1 "register_operand" "0,x"))
1731	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1732  "TARGET_SSE"
1733{
1734  static char buf[32];
1735  const char *ops;
1736  const char *suffix;
1737
1738  switch (get_attr_mode (insn))
1739    {
1740    case MODE_V8SF:
1741    case MODE_V4SF:
1742      suffix = "ps";
1743      break;
1744    default:
1745      suffix = "<ssemodesuffix>";
1746    }
1747
1748  switch (which_alternative)
1749    {
1750    case 0:
1751      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1752      break;
1753    case 1:
1754      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1755      break;
1756    default:
1757      gcc_unreachable ();
1758    }
1759
1760  snprintf (buf, sizeof (buf), ops, suffix);
1761  return buf;
1762}
1763  [(set_attr "isa" "noavx,avx")
1764   (set_attr "type" "sselog")
1765   (set_attr "prefix" "orig,vex")
1766   (set (attr "mode")
1767	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1768		 (const_string "<ssePSmode>")
1769	       (match_test "TARGET_AVX")
1770		 (const_string "<MODE>")
1771	       (match_test "optimize_function_for_size_p (cfun)")
1772		 (const_string "V4SF")
1773	       ]
1774	       (const_string "<MODE>")))])
1775
1776(define_expand "<code><mode>3"
1777  [(set (match_operand:VF 0 "register_operand")
1778	(any_logic:VF
1779	  (match_operand:VF 1 "nonimmediate_operand")
1780	  (match_operand:VF 2 "nonimmediate_operand")))]
1781  "TARGET_SSE"
1782  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1783
1784(define_insn "*<code><mode>3"
1785  [(set (match_operand:VF 0 "register_operand" "=x,x")
1786	(any_logic:VF
1787	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1788	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1789  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1790{
1791  static char buf[32];
1792  const char *ops;
1793  const char *suffix;
1794
1795  switch (get_attr_mode (insn))
1796    {
1797    case MODE_V8SF:
1798    case MODE_V4SF:
1799      suffix = "ps";
1800      break;
1801    default:
1802      suffix = "<ssemodesuffix>";
1803    }
1804
1805  switch (which_alternative)
1806    {
1807    case 0:
1808      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1809      break;
1810    case 1:
1811      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1812      break;
1813    default:
1814      gcc_unreachable ();
1815    }
1816
1817  snprintf (buf, sizeof (buf), ops, suffix);
1818  return buf;
1819}
1820  [(set_attr "isa" "noavx,avx")
1821   (set_attr "type" "sselog")
1822   (set_attr "prefix" "orig,vex")
1823   (set (attr "mode")
1824	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1825		 (const_string "<ssePSmode>")
1826	       (match_test "TARGET_AVX")
1827		 (const_string "<MODE>")
1828	       (match_test "optimize_function_for_size_p (cfun)")
1829		 (const_string "V4SF")
1830	       ]
1831	       (const_string "<MODE>")))])
1832
1833(define_expand "copysign<mode>3"
1834  [(set (match_dup 4)
1835	(and:VF
1836	  (not:VF (match_dup 3))
1837	  (match_operand:VF 1 "nonimmediate_operand")))
1838   (set (match_dup 5)
1839	(and:VF (match_dup 3)
1840		(match_operand:VF 2 "nonimmediate_operand")))
1841   (set (match_operand:VF 0 "register_operand")
1842	(ior:VF (match_dup 4) (match_dup 5)))]
1843  "TARGET_SSE"
1844{
1845  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1846
1847  operands[4] = gen_reg_rtx (<MODE>mode);
1848  operands[5] = gen_reg_rtx (<MODE>mode);
1849})
1850
1851;; Also define scalar versions.  These are used for abs, neg, and
1852;; conditional move.  Using subregs into vector modes causes register
1853;; allocation lossage.  These patterns do not allow memory operands
1854;; because the native instructions read the full 128-bits.
1855
1856(define_insn "*andnot<mode>3"
1857  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1858	(and:MODEF
1859	  (not:MODEF
1860	    (match_operand:MODEF 1 "register_operand" "0,x"))
1861	    (match_operand:MODEF 2 "register_operand" "x,x")))]
1862  "SSE_FLOAT_MODE_P (<MODE>mode)"
1863{
1864  static char buf[32];
1865  const char *ops;
1866  const char *suffix
1867    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1868
1869  switch (which_alternative)
1870    {
1871    case 0:
1872      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1873      break;
1874    case 1:
1875      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1876      break;
1877    default:
1878      gcc_unreachable ();
1879    }
1880
1881  snprintf (buf, sizeof (buf), ops, suffix);
1882  return buf;
1883}
1884  [(set_attr "isa" "noavx,avx")
1885   (set_attr "type" "sselog")
1886   (set_attr "prefix" "orig,vex")
1887   (set (attr "mode")
1888	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1889		 (const_string "V4SF")
1890	       (match_test "TARGET_AVX")
1891		 (const_string "<ssevecmode>")
1892	       (match_test "optimize_function_for_size_p (cfun)")
1893		 (const_string "V4SF")
1894	       ]
1895	       (const_string "<ssevecmode>")))])
1896
1897(define_insn "*andnottf3"
1898  [(set (match_operand:TF 0 "register_operand" "=x,x")
1899	(and:TF
1900	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
1901	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
1902  "TARGET_SSE"
1903{
1904  static char buf[32];
1905  const char *ops;
1906  const char *tmp
1907    = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
1908
1909  switch (which_alternative)
1910    {
1911    case 0:
1912      ops = "%s\t{%%2, %%0|%%0, %%2}";
1913      break;
1914    case 1:
1915      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1916      break;
1917    default:
1918      gcc_unreachable ();
1919    }
1920
1921  snprintf (buf, sizeof (buf), ops, tmp);
1922  return buf;
1923}
1924  [(set_attr "isa" "noavx,avx")
1925   (set_attr "type" "sselog")
1926   (set (attr "prefix_data16")
1927     (if_then_else
1928       (and (eq_attr "alternative" "0")
1929	    (eq_attr "mode" "TI"))
1930       (const_string "1")
1931       (const_string "*")))
1932   (set_attr "prefix" "orig,vex")
1933   (set (attr "mode")
1934	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1935		 (const_string "V4SF")
1936	       (match_test "TARGET_AVX")
1937		 (const_string "TI")
1938	       (ior (not (match_test "TARGET_SSE2"))
1939		    (match_test "optimize_function_for_size_p (cfun)"))
1940		 (const_string "V4SF")
1941	       ]
1942	       (const_string "TI")))])
1943
1944(define_insn "*<code><mode>3"
1945  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1946	(any_logic:MODEF
1947	  (match_operand:MODEF 1 "register_operand" "%0,x")
1948	  (match_operand:MODEF 2 "register_operand" "x,x")))]
1949  "SSE_FLOAT_MODE_P (<MODE>mode)"
1950{
1951  static char buf[32];
1952  const char *ops;
1953  const char *suffix
1954    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1955
1956  switch (which_alternative)
1957    {
1958    case 0:
1959      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1960      break;
1961    case 1:
1962      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1963      break;
1964    default:
1965      gcc_unreachable ();
1966    }
1967
1968  snprintf (buf, sizeof (buf), ops, suffix);
1969  return buf;
1970}
1971  [(set_attr "isa" "noavx,avx")
1972   (set_attr "type" "sselog")
1973   (set_attr "prefix" "orig,vex")
1974   (set (attr "mode")
1975	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1976		 (const_string "V4SF")
1977	       (match_test "TARGET_AVX")
1978		 (const_string "<ssevecmode>")
1979	       (match_test "optimize_function_for_size_p (cfun)")
1980		 (const_string "V4SF")
1981	       ]
1982	       (const_string "<ssevecmode>")))])
1983
1984(define_expand "<code>tf3"
1985  [(set (match_operand:TF 0 "register_operand")
1986	(any_logic:TF
1987	  (match_operand:TF 1 "nonimmediate_operand")
1988	  (match_operand:TF 2 "nonimmediate_operand")))]
1989  "TARGET_SSE"
1990  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
1991
1992(define_insn "*<code>tf3"
1993  [(set (match_operand:TF 0 "register_operand" "=x,x")
1994	(any_logic:TF
1995	  (match_operand:TF 1 "nonimmediate_operand" "%0,x")
1996	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
1997  "TARGET_SSE
1998   && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
1999{
2000  static char buf[32];
2001  const char *ops;
2002  const char *tmp
2003    = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2004
2005  switch (which_alternative)
2006    {
2007    case 0:
2008      ops = "%s\t{%%2, %%0|%%0, %%2}";
2009      break;
2010    case 1:
2011      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2012      break;
2013    default:
2014      gcc_unreachable ();
2015    }
2016
2017  snprintf (buf, sizeof (buf), ops, tmp);
2018  return buf;
2019}
2020  [(set_attr "isa" "noavx,avx")
2021   (set_attr "type" "sselog")
2022   (set (attr "prefix_data16")
2023     (if_then_else
2024       (and (eq_attr "alternative" "0")
2025	    (eq_attr "mode" "TI"))
2026       (const_string "1")
2027       (const_string "*")))
2028   (set_attr "prefix" "orig,vex")
2029   (set (attr "mode")
2030	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2031		 (const_string "V4SF")
2032	       (match_test "TARGET_AVX")
2033		 (const_string "TI")
2034	       (ior (not (match_test "TARGET_SSE2"))
2035		    (match_test "optimize_function_for_size_p (cfun)"))
2036		 (const_string "V4SF")
2037	       ]
2038	       (const_string "TI")))])
2039
2040;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2041;;
2042;; FMA floating point multiply/accumulate instructions.  These include
2043;; scalar versions of the instructions as well as vector versions.
2044;;
2045;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2046
2047;; The standard names for scalar FMA are only available with SSE math enabled.
2048(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
2049				(DF "TARGET_SSE_MATH")
2050				V4SF V2DF V8SF V4DF])
2051
2052(define_expand "fma<mode>4"
2053  [(set (match_operand:FMAMODEM 0 "register_operand")
2054	(fma:FMAMODEM
2055	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
2056	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
2057	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2058  "TARGET_FMA || TARGET_FMA4")
2059
2060(define_expand "fms<mode>4"
2061  [(set (match_operand:FMAMODEM 0 "register_operand")
2062	(fma:FMAMODEM
2063	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
2064	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
2065	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2066  "TARGET_FMA || TARGET_FMA4")
2067
2068(define_expand "fnma<mode>4"
2069  [(set (match_operand:FMAMODEM 0 "register_operand")
2070	(fma:FMAMODEM
2071	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2072	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
2073	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2074  "TARGET_FMA || TARGET_FMA4")
2075
2076(define_expand "fnms<mode>4"
2077  [(set (match_operand:FMAMODEM 0 "register_operand")
2078	(fma:FMAMODEM
2079	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2080	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
2081	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2082  "TARGET_FMA || TARGET_FMA4")
2083
2084;; The builtins for intrinsics are not constrained by SSE math enabled.
2085(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
2086
2087(define_expand "fma4i_fmadd_<mode>"
2088  [(set (match_operand:FMAMODE 0 "register_operand")
2089	(fma:FMAMODE
2090	  (match_operand:FMAMODE 1 "nonimmediate_operand")
2091	  (match_operand:FMAMODE 2 "nonimmediate_operand")
2092	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2093  "TARGET_FMA || TARGET_FMA4")
2094
2095(define_insn "*fma_fmadd_<mode>"
2096  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2097	(fma:FMAMODE
2098	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
2099	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2100	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2101  "TARGET_FMA || TARGET_FMA4"
2102  "@
2103   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2104   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2105   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2106   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2107   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2108  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2109   (set_attr "type" "ssemuladd")
2110   (set_attr "mode" "<MODE>")])
2111
2112(define_insn "*fma_fmsub_<mode>"
2113  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2114	(fma:FMAMODE
2115	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x, x,x")
2116	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
2117	  (neg:FMAMODE
2118	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2119  "TARGET_FMA || TARGET_FMA4"
2120  "@
2121   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2122   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2123   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2124   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2125   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2126  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2127   (set_attr "type" "ssemuladd")
2128   (set_attr "mode" "<MODE>")])
2129
2130(define_insn "*fma_fnmadd_<mode>"
2131  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2132	(fma:FMAMODE
2133	  (neg:FMAMODE
2134	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2135	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
2136	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2137  "TARGET_FMA || TARGET_FMA4"
2138  "@
2139   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2140   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2141   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2142   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2143   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2144  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2145   (set_attr "type" "ssemuladd")
2146   (set_attr "mode" "<MODE>")])
2147
2148(define_insn "*fma_fnmsub_<mode>"
2149  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2150	(fma:FMAMODE
2151	  (neg:FMAMODE
2152	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2153	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
2154	  (neg:FMAMODE
2155	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2156  "TARGET_FMA || TARGET_FMA4"
2157  "@
2158   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2159   vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2160   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2161   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2162   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2163  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2164   (set_attr "type" "ssemuladd")
2165   (set_attr "mode" "<MODE>")])
2166
2167;; FMA parallel floating point multiply addsub and subadd operations.
2168
2169;; It would be possible to represent these without the UNSPEC as
2170;;
2171;; (vec_merge
2172;;   (fma op1 op2 op3)
2173;;   (fma op1 op2 (neg op3))
2174;;   (merge-const))
2175;;
2176;; But this doesn't seem useful in practice.
2177
2178(define_expand "fmaddsub_<mode>"
2179  [(set (match_operand:VF 0 "register_operand")
2180	(unspec:VF
2181	  [(match_operand:VF 1 "nonimmediate_operand")
2182	   (match_operand:VF 2 "nonimmediate_operand")
2183	   (match_operand:VF 3 "nonimmediate_operand")]
2184	  UNSPEC_FMADDSUB))]
2185  "TARGET_FMA || TARGET_FMA4")
2186
2187(define_insn "*fma_fmaddsub_<mode>"
2188  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2189	(unspec:VF
2190	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
2191	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
2192	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
2193	  UNSPEC_FMADDSUB))]
2194  "TARGET_FMA || TARGET_FMA4"
2195  "@
2196   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2197   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2198   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2199   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2200   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2201  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2202   (set_attr "type" "ssemuladd")
2203   (set_attr "mode" "<MODE>")])
2204
2205(define_insn "*fma_fmsubadd_<mode>"
2206  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2207	(unspec:VF
2208	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x, x,x")
2209	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm,x,m")
2210	   (neg:VF
2211	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
2212	  UNSPEC_FMADDSUB))]
2213  "TARGET_FMA || TARGET_FMA4"
2214  "@
2215   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2216   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2217   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2218   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2219   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2220  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2221   (set_attr "type" "ssemuladd")
2222   (set_attr "mode" "<MODE>")])
2223
2224;; FMA3 floating point scalar intrinsics. These merge result with
2225;; high-order elements from the destination register.
2226
2227(define_expand "fmai_vmfmadd_<mode>"
2228  [(set (match_operand:VF_128 0 "register_operand")
2229	(vec_merge:VF_128
2230	  (fma:VF_128
2231	    (match_operand:VF_128 1 "nonimmediate_operand")
2232	    (match_operand:VF_128 2 "nonimmediate_operand")
2233	    (match_operand:VF_128 3 "nonimmediate_operand"))
2234	  (match_dup 1)
2235	  (const_int 1)))]
2236  "TARGET_FMA")
2237
2238(define_insn "*fmai_fmadd_<mode>"
2239  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2240        (vec_merge:VF_128
2241	  (fma:VF_128
2242	    (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2243	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
2244	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
2245	  (match_dup 1)
2246	  (const_int 1)))]
2247  "TARGET_FMA"
2248  "@
2249   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2250   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2251  [(set_attr "type" "ssemuladd")
2252   (set_attr "mode" "<MODE>")])
2253
2254(define_insn "*fmai_fmsub_<mode>"
2255  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2256        (vec_merge:VF_128
2257	  (fma:VF_128
2258	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
2259	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x")
2260	    (neg:VF_128
2261	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2262	  (match_dup 1)
2263	  (const_int 1)))]
2264  "TARGET_FMA"
2265  "@
2266   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2267   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2268  [(set_attr "type" "ssemuladd")
2269   (set_attr "mode" "<MODE>")])
2270
2271(define_insn "*fmai_fnmadd_<mode>"
2272  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2273        (vec_merge:VF_128
2274	  (fma:VF_128
2275	    (neg:VF_128
2276	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2277	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
2278	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm"))
2279	  (match_dup 1)
2280	  (const_int 1)))]
2281  "TARGET_FMA"
2282  "@
2283   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2284   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2285  [(set_attr "type" "ssemuladd")
2286   (set_attr "mode" "<MODE>")])
2287
2288(define_insn "*fmai_fnmsub_<mode>"
2289  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2290        (vec_merge:VF_128
2291	  (fma:VF_128
2292	    (neg:VF_128
2293	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2294	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
2295	    (neg:VF_128
2296	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2297	  (match_dup 1)
2298	  (const_int 1)))]
2299  "TARGET_FMA"
2300  "@
2301   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2302   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2303  [(set_attr "type" "ssemuladd")
2304   (set_attr "mode" "<MODE>")])
2305
2306;; FMA4 floating point scalar intrinsics.  These write the
2307;; entire destination register, with the high-order elements zeroed.
2308
2309(define_expand "fma4i_vmfmadd_<mode>"
2310  [(set (match_operand:VF_128 0 "register_operand")
2311	(vec_merge:VF_128
2312	  (fma:VF_128
2313	    (match_operand:VF_128 1 "nonimmediate_operand")
2314	    (match_operand:VF_128 2 "nonimmediate_operand")
2315	    (match_operand:VF_128 3 "nonimmediate_operand"))
2316	  (match_dup 4)
2317	  (const_int 1)))]
2318  "TARGET_FMA4"
2319  "operands[4] = CONST0_RTX (<MODE>mode);")
2320
2321(define_insn "*fma4i_vmfmadd_<mode>"
2322  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2323	(vec_merge:VF_128
2324	  (fma:VF_128
2325	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2326	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2327	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2328	  (match_operand:VF_128 4 "const0_operand")
2329	  (const_int 1)))]
2330  "TARGET_FMA4"
2331  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2332  [(set_attr "type" "ssemuladd")
2333   (set_attr "mode" "<MODE>")])
2334
2335(define_insn "*fma4i_vmfmsub_<mode>"
2336  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2337	(vec_merge:VF_128
2338	  (fma:VF_128
2339	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2340	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2341	    (neg:VF_128
2342	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2343	  (match_operand:VF_128 4 "const0_operand")
2344	  (const_int 1)))]
2345  "TARGET_FMA4"
2346  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2347  [(set_attr "type" "ssemuladd")
2348   (set_attr "mode" "<MODE>")])
2349
2350(define_insn "*fma4i_vmfnmadd_<mode>"
2351  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2352	(vec_merge:VF_128
2353	  (fma:VF_128
2354	    (neg:VF_128
2355	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2356	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2357	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
2358	  (match_operand:VF_128 4 "const0_operand")
2359	  (const_int 1)))]
2360  "TARGET_FMA4"
2361  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2362  [(set_attr "type" "ssemuladd")
2363   (set_attr "mode" "<MODE>")])
2364
2365(define_insn "*fma4i_vmfnmsub_<mode>"
2366  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2367	(vec_merge:VF_128
2368	  (fma:VF_128
2369	    (neg:VF_128
2370	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2371	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2372	    (neg:VF_128
2373	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
2374	  (match_operand:VF_128 4 "const0_operand")
2375	  (const_int 1)))]
2376  "TARGET_FMA4"
2377  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2378  [(set_attr "type" "ssemuladd")
2379   (set_attr "mode" "<MODE>")])
2380
2381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2382;;
2383;; Parallel single-precision floating point conversion operations
2384;;
2385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2386
2387(define_insn "sse_cvtpi2ps"
2388  [(set (match_operand:V4SF 0 "register_operand" "=x")
2389	(vec_merge:V4SF
2390	  (vec_duplicate:V4SF
2391	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2392	  (match_operand:V4SF 1 "register_operand" "0")
2393	  (const_int 3)))]
2394  "TARGET_SSE"
2395  "cvtpi2ps\t{%2, %0|%0, %2}"
2396  [(set_attr "type" "ssecvt")
2397   (set_attr "mode" "V4SF")])
2398
2399(define_insn "sse_cvtps2pi"
2400  [(set (match_operand:V2SI 0 "register_operand" "=y")
2401	(vec_select:V2SI
2402	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2403		       UNSPEC_FIX_NOTRUNC)
2404	  (parallel [(const_int 0) (const_int 1)])))]
2405  "TARGET_SSE"
2406  "cvtps2pi\t{%1, %0|%0, %1}"
2407  [(set_attr "type" "ssecvt")
2408   (set_attr "unit" "mmx")
2409   (set_attr "mode" "DI")])
2410
2411(define_insn "sse_cvttps2pi"
2412  [(set (match_operand:V2SI 0 "register_operand" "=y")
2413	(vec_select:V2SI
2414	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2415	  (parallel [(const_int 0) (const_int 1)])))]
2416  "TARGET_SSE"
2417  "cvttps2pi\t{%1, %0|%0, %1}"
2418  [(set_attr "type" "ssecvt")
2419   (set_attr "unit" "mmx")
2420   (set_attr "prefix_rep" "0")
2421   (set_attr "mode" "SF")])
2422
2423(define_insn "sse_cvtsi2ss"
2424  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2425	(vec_merge:V4SF
2426	  (vec_duplicate:V4SF
2427	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2428	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2429	  (const_int 1)))]
2430  "TARGET_SSE"
2431  "@
2432   cvtsi2ss\t{%2, %0|%0, %2}
2433   cvtsi2ss\t{%2, %0|%0, %2}
2434   vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2435  [(set_attr "isa" "noavx,noavx,avx")
2436   (set_attr "type" "sseicvt")
2437   (set_attr "athlon_decode" "vector,double,*")
2438   (set_attr "amdfam10_decode" "vector,double,*")
2439   (set_attr "bdver1_decode" "double,direct,*")
2440   (set_attr "btver2_decode" "double,double,double")
2441   (set_attr "prefix" "orig,orig,vex")
2442   (set_attr "mode" "SF")])
2443
2444(define_insn "sse_cvtsi2ssq"
2445  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2446	(vec_merge:V4SF
2447	  (vec_duplicate:V4SF
2448	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2449	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2450	  (const_int 1)))]
2451  "TARGET_SSE && TARGET_64BIT"
2452  "@
2453   cvtsi2ssq\t{%2, %0|%0, %2}
2454   cvtsi2ssq\t{%2, %0|%0, %2}
2455   vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2456  [(set_attr "isa" "noavx,noavx,avx")
2457   (set_attr "type" "sseicvt")
2458   (set_attr "athlon_decode" "vector,double,*")
2459   (set_attr "amdfam10_decode" "vector,double,*")
2460   (set_attr "bdver1_decode" "double,direct,*")
2461   (set_attr "btver2_decode" "double,double,double")
2462   (set_attr "length_vex" "*,*,4")
2463   (set_attr "prefix_rex" "1,1,*")
2464   (set_attr "prefix" "orig,orig,vex")
2465   (set_attr "mode" "SF")])
2466
2467(define_insn "sse_cvtss2si"
2468  [(set (match_operand:SI 0 "register_operand" "=r,r")
2469	(unspec:SI
2470	  [(vec_select:SF
2471	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2472	     (parallel [(const_int 0)]))]
2473	  UNSPEC_FIX_NOTRUNC))]
2474  "TARGET_SSE"
2475  "%vcvtss2si\t{%1, %0|%0, %1}"
2476  [(set_attr "type" "sseicvt")
2477   (set_attr "athlon_decode" "double,vector")
2478   (set_attr "bdver1_decode" "double,double")
2479   (set_attr "prefix_rep" "1")
2480   (set_attr "prefix" "maybe_vex")
2481   (set_attr "mode" "SI")])
2482
2483(define_insn "sse_cvtss2si_2"
2484  [(set (match_operand:SI 0 "register_operand" "=r,r")
2485	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2486		   UNSPEC_FIX_NOTRUNC))]
2487  "TARGET_SSE"
2488  "%vcvtss2si\t{%1, %0|%0, %1}"
2489  [(set_attr "type" "sseicvt")
2490   (set_attr "athlon_decode" "double,vector")
2491   (set_attr "amdfam10_decode" "double,double")
2492   (set_attr "bdver1_decode" "double,double")
2493   (set_attr "prefix_rep" "1")
2494   (set_attr "prefix" "maybe_vex")
2495   (set_attr "mode" "SI")])
2496
2497(define_insn "sse_cvtss2siq"
2498  [(set (match_operand:DI 0 "register_operand" "=r,r")
2499	(unspec:DI
2500	  [(vec_select:SF
2501	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2502	     (parallel [(const_int 0)]))]
2503	  UNSPEC_FIX_NOTRUNC))]
2504  "TARGET_SSE && TARGET_64BIT"
2505  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2506  [(set_attr "type" "sseicvt")
2507   (set_attr "athlon_decode" "double,vector")
2508   (set_attr "bdver1_decode" "double,double")
2509   (set_attr "prefix_rep" "1")
2510   (set_attr "prefix" "maybe_vex")
2511   (set_attr "mode" "DI")])
2512
2513(define_insn "sse_cvtss2siq_2"
2514  [(set (match_operand:DI 0 "register_operand" "=r,r")
2515	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2516		   UNSPEC_FIX_NOTRUNC))]
2517  "TARGET_SSE && TARGET_64BIT"
2518  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2519  [(set_attr "type" "sseicvt")
2520   (set_attr "athlon_decode" "double,vector")
2521   (set_attr "amdfam10_decode" "double,double")
2522   (set_attr "bdver1_decode" "double,double")
2523   (set_attr "prefix_rep" "1")
2524   (set_attr "prefix" "maybe_vex")
2525   (set_attr "mode" "DI")])
2526
2527(define_insn "sse_cvttss2si"
2528  [(set (match_operand:SI 0 "register_operand" "=r,r")
2529	(fix:SI
2530	  (vec_select:SF
2531	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2532	    (parallel [(const_int 0)]))))]
2533  "TARGET_SSE"
2534  "%vcvttss2si\t{%1, %0|%0, %1}"
2535  [(set_attr "type" "sseicvt")
2536   (set_attr "athlon_decode" "double,vector")
2537   (set_attr "amdfam10_decode" "double,double")
2538   (set_attr "bdver1_decode" "double,double")
2539   (set_attr "prefix_rep" "1")
2540   (set_attr "prefix" "maybe_vex")
2541   (set_attr "mode" "SI")])
2542
2543(define_insn "sse_cvttss2siq"
2544  [(set (match_operand:DI 0 "register_operand" "=r,r")
2545	(fix:DI
2546	  (vec_select:SF
2547	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2548	    (parallel [(const_int 0)]))))]
2549  "TARGET_SSE && TARGET_64BIT"
2550  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2551  [(set_attr "type" "sseicvt")
2552   (set_attr "athlon_decode" "double,vector")
2553   (set_attr "amdfam10_decode" "double,double")
2554   (set_attr "bdver1_decode" "double,double")
2555   (set_attr "prefix_rep" "1")
2556   (set_attr "prefix" "maybe_vex")
2557   (set_attr "mode" "DI")])
2558
2559(define_insn "float<sseintvecmodelower><mode>2"
2560  [(set (match_operand:VF1 0 "register_operand" "=x")
2561	(float:VF1
2562	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2563  "TARGET_SSE2"
2564  "%vcvtdq2ps\t{%1, %0|%0, %1}"
2565  [(set_attr "type" "ssecvt")
2566   (set_attr "prefix" "maybe_vex")
2567   (set_attr "mode" "<sseinsnmode>")])
2568
2569(define_expand "floatuns<sseintvecmodelower><mode>2"
2570  [(match_operand:VF1 0 "register_operand")
2571   (match_operand:<sseintvecmode> 1 "register_operand")]
2572  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2573{
2574  ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2575  DONE;
2576})
2577
2578(define_insn "avx_cvtps2dq256"
2579  [(set (match_operand:V8SI 0 "register_operand" "=x")
2580	(unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2581		     UNSPEC_FIX_NOTRUNC))]
2582  "TARGET_AVX"
2583  "vcvtps2dq\t{%1, %0|%0, %1}"
2584  [(set_attr "type" "ssecvt")
2585   (set_attr "prefix" "vex")
2586   (set_attr "mode" "OI")])
2587
2588(define_insn "sse2_cvtps2dq"
2589  [(set (match_operand:V4SI 0 "register_operand" "=x")
2590	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2591		     UNSPEC_FIX_NOTRUNC))]
2592  "TARGET_SSE2"
2593  "%vcvtps2dq\t{%1, %0|%0, %1}"
2594  [(set_attr "type" "ssecvt")
2595   (set (attr "prefix_data16")
2596     (if_then_else
2597       (match_test "TARGET_AVX")
2598     (const_string "*")
2599     (const_string "1")))
2600   (set_attr "prefix" "maybe_vex")
2601   (set_attr "mode" "TI")])
2602
2603(define_insn "fix_truncv8sfv8si2"
2604  [(set (match_operand:V8SI 0 "register_operand" "=x")
2605	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2606  "TARGET_AVX"
2607  "vcvttps2dq\t{%1, %0|%0, %1}"
2608  [(set_attr "type" "ssecvt")
2609   (set_attr "prefix" "vex")
2610   (set_attr "mode" "OI")])
2611
2612(define_insn "fix_truncv4sfv4si2"
2613  [(set (match_operand:V4SI 0 "register_operand" "=x")
2614	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2615  "TARGET_SSE2"
2616  "%vcvttps2dq\t{%1, %0|%0, %1}"
2617  [(set_attr "type" "ssecvt")
2618   (set (attr "prefix_rep")
2619     (if_then_else
2620       (match_test "TARGET_AVX")
2621     (const_string "*")
2622     (const_string "1")))
2623   (set (attr "prefix_data16")
2624     (if_then_else
2625       (match_test "TARGET_AVX")
2626     (const_string "*")
2627     (const_string "0")))
2628   (set_attr "prefix_data16" "0")
2629   (set_attr "prefix" "maybe_vex")
2630   (set_attr "mode" "TI")])
2631
2632(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2633  [(match_operand:<sseintvecmode> 0 "register_operand")
2634   (match_operand:VF1 1 "register_operand")]
2635  "TARGET_SSE2"
2636{
2637  rtx tmp[3];
2638  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2639  tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2640  emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2641  emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2642  DONE;
2643})
2644
2645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2646;;
2647;; Parallel double-precision floating point conversion operations
2648;;
2649;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2650
2651(define_insn "sse2_cvtpi2pd"
2652  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2653	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2654  "TARGET_SSE2"
2655  "cvtpi2pd\t{%1, %0|%0, %1}"
2656  [(set_attr "type" "ssecvt")
2657   (set_attr "unit" "mmx,*")
2658   (set_attr "prefix_data16" "1,*")
2659   (set_attr "mode" "V2DF")])
2660
2661(define_insn "sse2_cvtpd2pi"
2662  [(set (match_operand:V2SI 0 "register_operand" "=y")
2663	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2664		     UNSPEC_FIX_NOTRUNC))]
2665  "TARGET_SSE2"
2666  "cvtpd2pi\t{%1, %0|%0, %1}"
2667  [(set_attr "type" "ssecvt")
2668   (set_attr "unit" "mmx")
2669   (set_attr "bdver1_decode" "double")
2670   (set_attr "btver2_decode" "direct")
2671   (set_attr "prefix_data16" "1")
2672   (set_attr "mode" "DI")])
2673
2674(define_insn "sse2_cvttpd2pi"
2675  [(set (match_operand:V2SI 0 "register_operand" "=y")
2676	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2677  "TARGET_SSE2"
2678  "cvttpd2pi\t{%1, %0|%0, %1}"
2679  [(set_attr "type" "ssecvt")
2680   (set_attr "unit" "mmx")
2681   (set_attr "bdver1_decode" "double")
2682   (set_attr "prefix_data16" "1")
2683   (set_attr "mode" "TI")])
2684
2685(define_insn "sse2_cvtsi2sd"
2686  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2687	(vec_merge:V2DF
2688	  (vec_duplicate:V2DF
2689	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2690	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2691	  (const_int 1)))]
2692  "TARGET_SSE2"
2693  "@
2694   cvtsi2sd\t{%2, %0|%0, %2}
2695   cvtsi2sd\t{%2, %0|%0, %2}
2696   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2697  [(set_attr "isa" "noavx,noavx,avx")
2698   (set_attr "type" "sseicvt")
2699   (set_attr "athlon_decode" "double,direct,*")
2700   (set_attr "amdfam10_decode" "vector,double,*")
2701   (set_attr "bdver1_decode" "double,direct,*")
2702   (set_attr "btver2_decode" "double,double,double")
2703   (set_attr "prefix" "orig,orig,vex")
2704   (set_attr "mode" "DF")])
2705
2706(define_insn "sse2_cvtsi2sdq"
2707  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2708	(vec_merge:V2DF
2709	  (vec_duplicate:V2DF
2710	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2711	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2712	  (const_int 1)))]
2713  "TARGET_SSE2 && TARGET_64BIT"
2714  "@
2715   cvtsi2sdq\t{%2, %0|%0, %2}
2716   cvtsi2sdq\t{%2, %0|%0, %2}
2717   vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2718  [(set_attr "isa" "noavx,noavx,avx")
2719   (set_attr "type" "sseicvt")
2720   (set_attr "athlon_decode" "double,direct,*")
2721   (set_attr "amdfam10_decode" "vector,double,*")
2722   (set_attr "bdver1_decode" "double,direct,*")
2723   (set_attr "length_vex" "*,*,4")
2724   (set_attr "prefix_rex" "1,1,*")
2725   (set_attr "prefix" "orig,orig,vex")
2726   (set_attr "mode" "DF")])
2727
2728(define_insn "sse2_cvtsd2si"
2729  [(set (match_operand:SI 0 "register_operand" "=r,r")
2730	(unspec:SI
2731	  [(vec_select:DF
2732	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2733	     (parallel [(const_int 0)]))]
2734	  UNSPEC_FIX_NOTRUNC))]
2735  "TARGET_SSE2"
2736  "%vcvtsd2si\t{%1, %0|%0, %1}"
2737  [(set_attr "type" "sseicvt")
2738   (set_attr "athlon_decode" "double,vector")
2739   (set_attr "bdver1_decode" "double,double")
2740   (set_attr "btver2_decode" "double,double")
2741   (set_attr "prefix_rep" "1")
2742   (set_attr "prefix" "maybe_vex")
2743   (set_attr "mode" "SI")])
2744
2745(define_insn "sse2_cvtsd2si_2"
2746  [(set (match_operand:SI 0 "register_operand" "=r,r")
2747	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2748		   UNSPEC_FIX_NOTRUNC))]
2749  "TARGET_SSE2"
2750  "%vcvtsd2si\t{%1, %0|%0, %1}"
2751  [(set_attr "type" "sseicvt")
2752   (set_attr "athlon_decode" "double,vector")
2753   (set_attr "amdfam10_decode" "double,double")
2754   (set_attr "bdver1_decode" "double,double")
2755   (set_attr "prefix_rep" "1")
2756   (set_attr "prefix" "maybe_vex")
2757   (set_attr "mode" "SI")])
2758
2759(define_insn "sse2_cvtsd2siq"
2760  [(set (match_operand:DI 0 "register_operand" "=r,r")
2761	(unspec:DI
2762	  [(vec_select:DF
2763	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2764	     (parallel [(const_int 0)]))]
2765	  UNSPEC_FIX_NOTRUNC))]
2766  "TARGET_SSE2 && TARGET_64BIT"
2767  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2768  [(set_attr "type" "sseicvt")
2769   (set_attr "athlon_decode" "double,vector")
2770   (set_attr "bdver1_decode" "double,double")
2771   (set_attr "prefix_rep" "1")
2772   (set_attr "prefix" "maybe_vex")
2773   (set_attr "mode" "DI")])
2774
2775(define_insn "sse2_cvtsd2siq_2"
2776  [(set (match_operand:DI 0 "register_operand" "=r,r")
2777	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2778		   UNSPEC_FIX_NOTRUNC))]
2779  "TARGET_SSE2 && TARGET_64BIT"
2780  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2781  [(set_attr "type" "sseicvt")
2782   (set_attr "athlon_decode" "double,vector")
2783   (set_attr "amdfam10_decode" "double,double")
2784   (set_attr "bdver1_decode" "double,double")
2785   (set_attr "prefix_rep" "1")
2786   (set_attr "prefix" "maybe_vex")
2787   (set_attr "mode" "DI")])
2788
2789(define_insn "sse2_cvttsd2si"
2790  [(set (match_operand:SI 0 "register_operand" "=r,r")
2791	(fix:SI
2792	  (vec_select:DF
2793	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2794	    (parallel [(const_int 0)]))))]
2795  "TARGET_SSE2"
2796  "%vcvttsd2si\t{%1, %0|%0, %1}"
2797  [(set_attr "type" "sseicvt")
2798   (set_attr "athlon_decode" "double,vector")
2799   (set_attr "amdfam10_decode" "double,double")
2800   (set_attr "bdver1_decode" "double,double")
2801   (set_attr "btver2_decode" "double,double")
2802   (set_attr "prefix_rep" "1")
2803   (set_attr "prefix" "maybe_vex")
2804   (set_attr "mode" "SI")])
2805
2806(define_insn "sse2_cvttsd2siq"
2807  [(set (match_operand:DI 0 "register_operand" "=r,r")
2808	(fix:DI
2809	  (vec_select:DF
2810	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2811	    (parallel [(const_int 0)]))))]
2812  "TARGET_SSE2 && TARGET_64BIT"
2813  "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2814  [(set_attr "type" "sseicvt")
2815   (set_attr "athlon_decode" "double,vector")
2816   (set_attr "amdfam10_decode" "double,double")
2817   (set_attr "bdver1_decode" "double,double")
2818   (set_attr "prefix_rep" "1")
2819   (set_attr "prefix" "maybe_vex")
2820   (set_attr "mode" "DI")])
2821
2822(define_insn "floatv4siv4df2"
2823  [(set (match_operand:V4DF 0 "register_operand" "=x")
2824	(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2825  "TARGET_AVX"
2826  "vcvtdq2pd\t{%1, %0|%0, %1}"
2827  [(set_attr "type" "ssecvt")
2828   (set_attr "prefix" "vex")
2829   (set_attr "mode" "V4DF")])
2830
2831(define_insn "avx_cvtdq2pd256_2"
2832  [(set (match_operand:V4DF 0 "register_operand" "=x")
2833	(float:V4DF
2834	  (vec_select:V4SI
2835	    (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2836	    (parallel [(const_int 0) (const_int 1)
2837		       (const_int 2) (const_int 3)]))))]
2838  "TARGET_AVX"
2839  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2840  [(set_attr "type" "ssecvt")
2841   (set_attr "prefix" "vex")
2842   (set_attr "mode" "V4DF")])
2843
2844(define_insn "sse2_cvtdq2pd"
2845  [(set (match_operand:V2DF 0 "register_operand" "=x")
2846	(float:V2DF
2847	  (vec_select:V2SI
2848	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2849	    (parallel [(const_int 0) (const_int 1)]))))]
2850  "TARGET_SSE2"
2851  "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2852  [(set_attr "type" "ssecvt")
2853   (set_attr "prefix" "maybe_vex")
2854   (set_attr "ssememalign" "64")
2855   (set_attr "mode" "V2DF")])
2856
2857(define_insn "avx_cvtpd2dq256"
2858  [(set (match_operand:V4SI 0 "register_operand" "=x")
2859	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2860		     UNSPEC_FIX_NOTRUNC))]
2861  "TARGET_AVX"
2862  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2863  [(set_attr "type" "ssecvt")
2864   (set_attr "prefix" "vex")
2865   (set_attr "mode" "OI")])
2866
2867(define_expand "avx_cvtpd2dq256_2"
2868  [(set (match_operand:V8SI 0 "register_operand")
2869	(vec_concat:V8SI
2870	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
2871		       UNSPEC_FIX_NOTRUNC)
2872	  (match_dup 2)))]
2873  "TARGET_AVX"
2874  "operands[2] = CONST0_RTX (V4SImode);")
2875
2876(define_insn "*avx_cvtpd2dq256_2"
2877  [(set (match_operand:V8SI 0 "register_operand" "=x")
2878	(vec_concat:V8SI
2879	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2880		       UNSPEC_FIX_NOTRUNC)
2881	  (match_operand:V4SI 2 "const0_operand")))]
2882  "TARGET_AVX"
2883  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2884  [(set_attr "type" "ssecvt")
2885   (set_attr "prefix" "vex")
2886   (set_attr "btver2_decode" "vector")
2887   (set_attr "mode" "OI")])
2888
2889(define_expand "sse2_cvtpd2dq"
2890  [(set (match_operand:V4SI 0 "register_operand")
2891	(vec_concat:V4SI
2892	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
2893		       UNSPEC_FIX_NOTRUNC)
2894	  (match_dup 2)))]
2895  "TARGET_SSE2"
2896  "operands[2] = CONST0_RTX (V2SImode);")
2897
2898(define_insn "*sse2_cvtpd2dq"
2899  [(set (match_operand:V4SI 0 "register_operand" "=x")
2900	(vec_concat:V4SI
2901	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2902		       UNSPEC_FIX_NOTRUNC)
2903	  (match_operand:V2SI 2 "const0_operand")))]
2904  "TARGET_SSE2"
2905{
2906  if (TARGET_AVX)
2907    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2908  else
2909    return "cvtpd2dq\t{%1, %0|%0, %1}";
2910}
2911  [(set_attr "type" "ssecvt")
2912   (set_attr "prefix_rep" "1")
2913   (set_attr "prefix_data16" "0")
2914   (set_attr "prefix" "maybe_vex")
2915   (set_attr "mode" "TI")
2916   (set_attr "amdfam10_decode" "double")
2917   (set_attr "athlon_decode" "vector")
2918   (set_attr "bdver1_decode" "double")])
2919
2920(define_insn "fix_truncv4dfv4si2"
2921  [(set (match_operand:V4SI 0 "register_operand" "=x")
2922	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2923  "TARGET_AVX"
2924  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2925  [(set_attr "type" "ssecvt")
2926   (set_attr "prefix" "vex")
2927   (set_attr "mode" "OI")])
2928
2929(define_expand "avx_cvttpd2dq256_2"
2930  [(set (match_operand:V8SI 0 "register_operand")
2931	(vec_concat:V8SI
2932	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
2933	  (match_dup 2)))]
2934  "TARGET_AVX"
2935  "operands[2] = CONST0_RTX (V4SImode);")
2936
2937(define_insn "*avx_cvttpd2dq256_2"
2938  [(set (match_operand:V8SI 0 "register_operand" "=x")
2939	(vec_concat:V8SI
2940	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2941	  (match_operand:V4SI 2 "const0_operand")))]
2942  "TARGET_AVX"
2943  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2944  [(set_attr "type" "ssecvt")
2945   (set_attr "prefix" "vex")
2946   (set_attr "btver2_decode" "vector")
2947   (set_attr "mode" "OI")])
2948
2949(define_expand "sse2_cvttpd2dq"
2950  [(set (match_operand:V4SI 0 "register_operand")
2951	(vec_concat:V4SI
2952	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
2953	  (match_dup 2)))]
2954  "TARGET_SSE2"
2955  "operands[2] = CONST0_RTX (V2SImode);")
2956
2957(define_insn "*sse2_cvttpd2dq"
2958  [(set (match_operand:V4SI 0 "register_operand" "=x")
2959	(vec_concat:V4SI
2960	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2961	  (match_operand:V2SI 2 "const0_operand")))]
2962  "TARGET_SSE2"
2963{
2964  if (TARGET_AVX)
2965    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2966  else
2967    return "cvttpd2dq\t{%1, %0|%0, %1}";
2968}
2969  [(set_attr "type" "ssecvt")
2970   (set_attr "amdfam10_decode" "double")
2971   (set_attr "athlon_decode" "vector")
2972   (set_attr "bdver1_decode" "double")
2973   (set_attr "prefix" "maybe_vex")
2974   (set_attr "mode" "TI")])
2975
2976(define_insn "sse2_cvtsd2ss"
2977  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2978	(vec_merge:V4SF
2979	  (vec_duplicate:V4SF
2980	    (float_truncate:V2SF
2981	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2982	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2983	  (const_int 1)))]
2984  "TARGET_SSE2"
2985  "@
2986   cvtsd2ss\t{%2, %0|%0, %2}
2987   cvtsd2ss\t{%2, %0|%0, %2}
2988   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2989  [(set_attr "isa" "noavx,noavx,avx")
2990   (set_attr "type" "ssecvt")
2991   (set_attr "athlon_decode" "vector,double,*")
2992   (set_attr "amdfam10_decode" "vector,double,*")
2993   (set_attr "bdver1_decode" "direct,direct,*")
2994   (set_attr "btver2_decode" "double,double,double")
2995   (set_attr "prefix" "orig,orig,vex")
2996   (set_attr "mode" "SF")])
2997
2998(define_insn "sse2_cvtss2sd"
2999  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3000	(vec_merge:V2DF
3001	  (float_extend:V2DF
3002	    (vec_select:V2SF
3003	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
3004	      (parallel [(const_int 0) (const_int 1)])))
3005	  (match_operand:V2DF 1 "register_operand" "0,0,x")
3006	  (const_int 1)))]
3007  "TARGET_SSE2"
3008  "@
3009   cvtss2sd\t{%2, %0|%0, %2}
3010   cvtss2sd\t{%2, %0|%0, %2}
3011   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3012  [(set_attr "isa" "noavx,noavx,avx")
3013   (set_attr "type" "ssecvt")
3014   (set_attr "amdfam10_decode" "vector,double,*")
3015   (set_attr "athlon_decode" "direct,direct,*")
3016   (set_attr "bdver1_decode" "direct,direct,*")
3017   (set_attr "btver2_decode" "double,double,double")
3018   (set_attr "prefix" "orig,orig,vex")
3019   (set_attr "mode" "DF")])
3020
3021(define_insn "avx_cvtpd2ps256"
3022  [(set (match_operand:V4SF 0 "register_operand" "=x")
3023	(float_truncate:V4SF
3024	  (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3025  "TARGET_AVX"
3026  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3027  [(set_attr "type" "ssecvt")
3028   (set_attr "prefix" "vex")
3029   (set_attr "btver2_decode" "vector")
3030   (set_attr "mode" "V4SF")])
3031
3032(define_expand "sse2_cvtpd2ps"
3033  [(set (match_operand:V4SF 0 "register_operand")
3034	(vec_concat:V4SF
3035	  (float_truncate:V2SF
3036	    (match_operand:V2DF 1 "nonimmediate_operand"))
3037	  (match_dup 2)))]
3038  "TARGET_SSE2"
3039  "operands[2] = CONST0_RTX (V2SFmode);")
3040
3041(define_insn "*sse2_cvtpd2ps"
3042  [(set (match_operand:V4SF 0 "register_operand" "=x")
3043	(vec_concat:V4SF
3044	  (float_truncate:V2SF
3045	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3046	  (match_operand:V2SF 2 "const0_operand")))]
3047  "TARGET_SSE2"
3048{
3049  if (TARGET_AVX)
3050    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
3051  else
3052    return "cvtpd2ps\t{%1, %0|%0, %1}";
3053}
3054  [(set_attr "type" "ssecvt")
3055   (set_attr "amdfam10_decode" "double")
3056   (set_attr "athlon_decode" "vector")
3057   (set_attr "bdver1_decode" "double")
3058   (set_attr "prefix_data16" "1")
3059   (set_attr "prefix" "maybe_vex")
3060   (set_attr "mode" "V4SF")])
3061
3062(define_insn "avx_cvtps2pd256"
3063  [(set (match_operand:V4DF 0 "register_operand" "=x")
3064	(float_extend:V4DF
3065	  (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3066  "TARGET_AVX"
3067  "vcvtps2pd\t{%1, %0|%0, %1}"
3068  [(set_attr "type" "ssecvt")
3069   (set_attr "prefix" "vex")
3070   (set_attr "mode" "V4DF")])
3071
3072(define_insn "*avx_cvtps2pd256_2"
3073  [(set (match_operand:V4DF 0 "register_operand" "=x")
3074	(float_extend:V4DF
3075	  (vec_select:V4SF
3076	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3077	    (parallel [(const_int 0) (const_int 1)
3078		       (const_int 2) (const_int 3)]))))]
3079  "TARGET_AVX"
3080  "vcvtps2pd\t{%x1, %0|%0, %x1}"
3081  [(set_attr "type" "ssecvt")
3082   (set_attr "prefix" "vex")
3083   (set_attr "mode" "V4DF")])
3084
3085(define_insn "sse2_cvtps2pd"
3086  [(set (match_operand:V2DF 0 "register_operand" "=x")
3087	(float_extend:V2DF
3088	  (vec_select:V2SF
3089	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3090	    (parallel [(const_int 0) (const_int 1)]))))]
3091  "TARGET_SSE2"
3092  "%vcvtps2pd\t{%1, %0|%0, %q1}"
3093  [(set_attr "type" "ssecvt")
3094   (set_attr "amdfam10_decode" "direct")
3095   (set_attr "athlon_decode" "double")
3096   (set_attr "bdver1_decode" "double")
3097   (set_attr "prefix_data16" "0")
3098   (set_attr "prefix" "maybe_vex")
3099   (set_attr "mode" "V2DF")])
3100
3101(define_expand "vec_unpacks_hi_v4sf"
3102  [(set (match_dup 2)
3103   (vec_select:V4SF
3104     (vec_concat:V8SF
3105       (match_dup 2)
3106       (match_operand:V4SF 1 "nonimmediate_operand"))
3107     (parallel [(const_int 6) (const_int 7)
3108		(const_int 2) (const_int 3)])))
3109  (set (match_operand:V2DF 0 "register_operand")
3110   (float_extend:V2DF
3111     (vec_select:V2SF
3112       (match_dup 2)
3113       (parallel [(const_int 0) (const_int 1)]))))]
3114  "TARGET_SSE2"
3115  "operands[2] = gen_reg_rtx (V4SFmode);")
3116
3117(define_expand "vec_unpacks_hi_v8sf"
3118  [(set (match_dup 2)
3119	(vec_select:V4SF
3120	  (match_operand:V8SF 1 "register_operand")
3121	  (parallel [(const_int 4) (const_int 5)
3122		     (const_int 6) (const_int 7)])))
3123   (set (match_operand:V4DF 0 "register_operand")
3124	(float_extend:V4DF
3125	  (match_dup 2)))]
3126  "TARGET_AVX"
3127  "operands[2] = gen_reg_rtx (V4SFmode);")
3128
3129(define_expand "vec_unpacks_lo_v4sf"
3130  [(set (match_operand:V2DF 0 "register_operand")
3131	(float_extend:V2DF
3132	  (vec_select:V2SF
3133	    (match_operand:V4SF 1 "nonimmediate_operand")
3134	    (parallel [(const_int 0) (const_int 1)]))))]
3135  "TARGET_SSE2")
3136
3137(define_expand "vec_unpacks_lo_v8sf"
3138  [(set (match_operand:V4DF 0 "register_operand")
3139	(float_extend:V4DF
3140	  (vec_select:V4SF
3141	    (match_operand:V8SF 1 "nonimmediate_operand")
3142	    (parallel [(const_int 0) (const_int 1)
3143		       (const_int 2) (const_int 3)]))))]
3144  "TARGET_AVX")
3145
3146(define_mode_attr sseunpackfltmode
3147  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
3148
3149(define_expand "vec_unpacks_float_hi_<mode>"
3150  [(match_operand:<sseunpackfltmode> 0 "register_operand")
3151   (match_operand:VI2_AVX2 1 "register_operand")]
3152  "TARGET_SSE2"
3153{
3154  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3155
3156  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
3157  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3158			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3159  DONE;
3160})
3161
3162(define_expand "vec_unpacks_float_lo_<mode>"
3163  [(match_operand:<sseunpackfltmode> 0 "register_operand")
3164   (match_operand:VI2_AVX2 1 "register_operand")]
3165  "TARGET_SSE2"
3166{
3167  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3168
3169  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
3170  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3171			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3172  DONE;
3173})
3174
3175(define_expand "vec_unpacku_float_hi_<mode>"
3176  [(match_operand:<sseunpackfltmode> 0 "register_operand")
3177   (match_operand:VI2_AVX2 1 "register_operand")]
3178  "TARGET_SSE2"
3179{
3180  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3181
3182  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
3183  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3184			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3185  DONE;
3186})
3187
3188(define_expand "vec_unpacku_float_lo_<mode>"
3189  [(match_operand:<sseunpackfltmode> 0 "register_operand")
3190   (match_operand:VI2_AVX2 1 "register_operand")]
3191  "TARGET_SSE2"
3192{
3193  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3194
3195  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
3196  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3197			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3198  DONE;
3199})
3200
3201(define_expand "vec_unpacks_float_hi_v4si"
3202  [(set (match_dup 2)
3203	(vec_select:V4SI
3204	  (match_operand:V4SI 1 "nonimmediate_operand")
3205	  (parallel [(const_int 2) (const_int 3)
3206		     (const_int 2) (const_int 3)])))
3207   (set (match_operand:V2DF 0 "register_operand")
3208	(float:V2DF
3209	  (vec_select:V2SI
3210	  (match_dup 2)
3211	    (parallel [(const_int 0) (const_int 1)]))))]
3212  "TARGET_SSE2"
3213  "operands[2] = gen_reg_rtx (V4SImode);")
3214
3215(define_expand "vec_unpacks_float_lo_v4si"
3216  [(set (match_operand:V2DF 0 "register_operand")
3217	(float:V2DF
3218	  (vec_select:V2SI
3219	    (match_operand:V4SI 1 "nonimmediate_operand")
3220	    (parallel [(const_int 0) (const_int 1)]))))]
3221  "TARGET_SSE2")
3222
3223(define_expand "vec_unpacks_float_hi_v8si"
3224  [(set (match_dup 2)
3225	(vec_select:V4SI
3226	  (match_operand:V8SI 1 "nonimmediate_operand")
3227	  (parallel [(const_int 4) (const_int 5)
3228		     (const_int 6) (const_int 7)])))
3229   (set (match_operand:V4DF 0 "register_operand")
3230	(float:V4DF
3231	  (match_dup 2)))]
3232  "TARGET_AVX"
3233  "operands[2] = gen_reg_rtx (V4SImode);")
3234
3235(define_expand "vec_unpacks_float_lo_v8si"
3236  [(set (match_operand:V4DF 0 "register_operand")
3237	(float:V4DF
3238	  (vec_select:V4SI
3239	    (match_operand:V8SI 1 "nonimmediate_operand")
3240	    (parallel [(const_int 0) (const_int 1)
3241		       (const_int 2) (const_int 3)]))))]
3242  "TARGET_AVX")
3243
3244(define_expand "vec_unpacku_float_hi_v4si"
3245  [(set (match_dup 5)
3246	(vec_select:V4SI
3247	  (match_operand:V4SI 1 "nonimmediate_operand")
3248	  (parallel [(const_int 2) (const_int 3)
3249		     (const_int 2) (const_int 3)])))
3250   (set (match_dup 6)
3251	(float:V2DF
3252	  (vec_select:V2SI
3253	  (match_dup 5)
3254	    (parallel [(const_int 0) (const_int 1)]))))
3255   (set (match_dup 7)
3256	(lt:V2DF (match_dup 6) (match_dup 3)))
3257   (set (match_dup 8)
3258	(and:V2DF (match_dup 7) (match_dup 4)))
3259   (set (match_operand:V2DF 0 "register_operand")
3260	(plus:V2DF (match_dup 6) (match_dup 8)))]
3261  "TARGET_SSE2"
3262{
3263  REAL_VALUE_TYPE TWO32r;
3264  rtx x;
3265  int i;
3266
3267  real_ldexp (&TWO32r, &dconst1, 32);
3268  x = const_double_from_real_value (TWO32r, DFmode);
3269
3270  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3271  operands[4] = force_reg (V2DFmode,
3272			   ix86_build_const_vector (V2DFmode, 1, x));
3273
3274  operands[5] = gen_reg_rtx (V4SImode);
3275
3276  for (i = 6; i < 9; i++)
3277    operands[i] = gen_reg_rtx (V2DFmode);
3278})
3279
3280(define_expand "vec_unpacku_float_lo_v4si"
3281  [(set (match_dup 5)
3282	(float:V2DF
3283	  (vec_select:V2SI
3284	    (match_operand:V4SI 1 "nonimmediate_operand")
3285	    (parallel [(const_int 0) (const_int 1)]))))
3286   (set (match_dup 6)
3287	(lt:V2DF (match_dup 5) (match_dup 3)))
3288   (set (match_dup 7)
3289	(and:V2DF (match_dup 6) (match_dup 4)))
3290   (set (match_operand:V2DF 0 "register_operand")
3291	(plus:V2DF (match_dup 5) (match_dup 7)))]
3292  "TARGET_SSE2"
3293{
3294  REAL_VALUE_TYPE TWO32r;
3295  rtx x;
3296  int i;
3297
3298  real_ldexp (&TWO32r, &dconst1, 32);
3299  x = const_double_from_real_value (TWO32r, DFmode);
3300
3301  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3302  operands[4] = force_reg (V2DFmode,
3303			   ix86_build_const_vector (V2DFmode, 1, x));
3304
3305  for (i = 5; i < 8; i++)
3306    operands[i] = gen_reg_rtx (V2DFmode);
3307})
3308
3309(define_expand "vec_unpacku_float_hi_v8si"
3310  [(match_operand:V4DF 0 "register_operand")
3311   (match_operand:V8SI 1 "register_operand")]
3312  "TARGET_AVX"
3313{
3314  REAL_VALUE_TYPE TWO32r;
3315  rtx x, tmp[6];
3316  int i;
3317
3318  real_ldexp (&TWO32r, &dconst1, 32);
3319  x = const_double_from_real_value (TWO32r, DFmode);
3320
3321  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3322  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3323  tmp[5] = gen_reg_rtx (V4SImode);
3324
3325  for (i = 2; i < 5; i++)
3326    tmp[i] = gen_reg_rtx (V4DFmode);
3327  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3328  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3329  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3330			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3331  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3332  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3333  DONE;
3334})
3335
3336(define_expand "vec_unpacku_float_lo_v8si"
3337  [(match_operand:V4DF 0 "register_operand")
3338   (match_operand:V8SI 1 "nonimmediate_operand")]
3339  "TARGET_AVX"
3340{
3341  REAL_VALUE_TYPE TWO32r;
3342  rtx x, tmp[5];
3343  int i;
3344
3345  real_ldexp (&TWO32r, &dconst1, 32);
3346  x = const_double_from_real_value (TWO32r, DFmode);
3347
3348  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3349  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3350
3351  for (i = 2; i < 5; i++)
3352    tmp[i] = gen_reg_rtx (V4DFmode);
3353  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3354  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3355			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3356  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3357  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3358  DONE;
3359})
3360
3361(define_expand "vec_pack_trunc_v4df"
3362  [(set (match_dup 3)
3363	(float_truncate:V4SF
3364	  (match_operand:V4DF 1 "nonimmediate_operand")))
3365   (set (match_dup 4)
3366	(float_truncate:V4SF
3367	  (match_operand:V4DF 2 "nonimmediate_operand")))
3368   (set (match_operand:V8SF 0 "register_operand")
3369	(vec_concat:V8SF
3370	  (match_dup 3)
3371	  (match_dup 4)))]
3372  "TARGET_AVX"
3373{
3374  operands[3] = gen_reg_rtx (V4SFmode);
3375  operands[4] = gen_reg_rtx (V4SFmode);
3376})
3377
3378(define_expand "vec_pack_trunc_v2df"
3379  [(match_operand:V4SF 0 "register_operand")
3380   (match_operand:V2DF 1 "nonimmediate_operand")
3381   (match_operand:V2DF 2 "nonimmediate_operand")]
3382  "TARGET_SSE2"
3383{
3384  rtx tmp0, tmp1;
3385
3386  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3387    {
3388      tmp0 = gen_reg_rtx (V4DFmode);
3389      tmp1 = force_reg (V2DFmode, operands[1]);
3390
3391      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3392      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3393    }
3394  else
3395    {
3396      tmp0 = gen_reg_rtx (V4SFmode);
3397      tmp1 = gen_reg_rtx (V4SFmode);
3398
3399      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3400      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3401      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3402    }
3403  DONE;
3404})
3405
3406(define_expand "vec_pack_sfix_trunc_v4df"
3407  [(match_operand:V8SI 0 "register_operand")
3408   (match_operand:V4DF 1 "nonimmediate_operand")
3409   (match_operand:V4DF 2 "nonimmediate_operand")]
3410  "TARGET_AVX"
3411{
3412  rtx r1, r2;
3413
3414  r1 = gen_reg_rtx (V4SImode);
3415  r2 = gen_reg_rtx (V4SImode);
3416
3417  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3418  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3419  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3420  DONE;
3421})
3422
3423(define_expand "vec_pack_sfix_trunc_v2df"
3424  [(match_operand:V4SI 0 "register_operand")
3425   (match_operand:V2DF 1 "nonimmediate_operand")
3426   (match_operand:V2DF 2 "nonimmediate_operand")]
3427  "TARGET_SSE2"
3428{
3429  rtx tmp0, tmp1;
3430
3431  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3432    {
3433      tmp0 = gen_reg_rtx (V4DFmode);
3434      tmp1 = force_reg (V2DFmode, operands[1]);
3435
3436      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3437      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3438    }
3439  else
3440    {
3441      tmp0 = gen_reg_rtx (V4SImode);
3442      tmp1 = gen_reg_rtx (V4SImode);
3443
3444      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3445      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3446      emit_insn
3447       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3448				    gen_lowpart (V2DImode, tmp0),
3449				    gen_lowpart (V2DImode, tmp1)));
3450    }
3451  DONE;
3452})
3453
3454(define_mode_attr ssepackfltmode
3455  [(V4DF "V8SI") (V2DF "V4SI")])
3456
3457(define_expand "vec_pack_ufix_trunc_<mode>"
3458  [(match_operand:<ssepackfltmode> 0 "register_operand")
3459   (match_operand:VF2 1 "register_operand")
3460   (match_operand:VF2 2 "register_operand")]
3461  "TARGET_SSE2"
3462{
3463  rtx tmp[7];
3464  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3465  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3466  tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3467  emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3468  if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3469    {
3470      tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3471      ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3472    }
3473  else
3474    {
3475      tmp[5] = gen_reg_rtx (V8SFmode);
3476      ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3477					gen_lowpart (V8SFmode, tmp[3]), 0);
3478      tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3479    }
3480  tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3481				operands[0], 0, OPTAB_DIRECT);
3482  if (tmp[6] != operands[0])
3483    emit_move_insn (operands[0], tmp[6]);
3484  DONE;
3485})
3486
3487(define_expand "vec_pack_sfix_v4df"
3488  [(match_operand:V8SI 0 "register_operand")
3489   (match_operand:V4DF 1 "nonimmediate_operand")
3490   (match_operand:V4DF 2 "nonimmediate_operand")]
3491  "TARGET_AVX"
3492{
3493  rtx r1, r2;
3494
3495  r1 = gen_reg_rtx (V4SImode);
3496  r2 = gen_reg_rtx (V4SImode);
3497
3498  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3499  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3500  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3501  DONE;
3502})
3503
3504(define_expand "vec_pack_sfix_v2df"
3505  [(match_operand:V4SI 0 "register_operand")
3506   (match_operand:V2DF 1 "nonimmediate_operand")
3507   (match_operand:V2DF 2 "nonimmediate_operand")]
3508  "TARGET_SSE2"
3509{
3510  rtx tmp0, tmp1;
3511
3512  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3513    {
3514      tmp0 = gen_reg_rtx (V4DFmode);
3515      tmp1 = force_reg (V2DFmode, operands[1]);
3516
3517      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3518      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3519    }
3520  else
3521    {
3522      tmp0 = gen_reg_rtx (V4SImode);
3523      tmp1 = gen_reg_rtx (V4SImode);
3524
3525      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3526      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3527      emit_insn
3528       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3529				    gen_lowpart (V2DImode, tmp0),
3530				    gen_lowpart (V2DImode, tmp1)));
3531    }
3532  DONE;
3533})
3534
3535;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3536;;
3537;; Parallel single-precision floating point element swizzling
3538;;
3539;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3540
3541(define_expand "sse_movhlps_exp"
3542  [(set (match_operand:V4SF 0 "nonimmediate_operand")
3543	(vec_select:V4SF
3544	  (vec_concat:V8SF
3545	    (match_operand:V4SF 1 "nonimmediate_operand")
3546	    (match_operand:V4SF 2 "nonimmediate_operand"))
3547	  (parallel [(const_int 6)
3548		     (const_int 7)
3549		     (const_int 2)
3550		     (const_int 3)])))]
3551  "TARGET_SSE"
3552{
3553  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3554
3555  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3556
3557  /* Fix up the destination if needed.  */
3558  if (dst != operands[0])
3559    emit_move_insn (operands[0], dst);
3560
3561  DONE;
3562})
3563
3564(define_insn "sse_movhlps"
3565  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3566	(vec_select:V4SF
3567	  (vec_concat:V8SF
3568	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3569	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3570	  (parallel [(const_int 6)
3571		     (const_int 7)
3572		     (const_int 2)
3573		     (const_int 3)])))]
3574  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3575  "@
3576   movhlps\t{%2, %0|%0, %2}
3577   vmovhlps\t{%2, %1, %0|%0, %1, %2}
3578   movlps\t{%H2, %0|%0, %H2}
3579   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3580   %vmovhps\t{%2, %0|%0, %2}"
3581  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3582   (set_attr "type" "ssemov")
3583   (set_attr "ssememalign" "64")
3584   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3585   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3586
3587(define_expand "sse_movlhps_exp"
3588  [(set (match_operand:V4SF 0 "nonimmediate_operand")
3589	(vec_select:V4SF
3590	  (vec_concat:V8SF
3591	    (match_operand:V4SF 1 "nonimmediate_operand")
3592	    (match_operand:V4SF 2 "nonimmediate_operand"))
3593	  (parallel [(const_int 0)
3594		     (const_int 1)
3595		     (const_int 4)
3596		     (const_int 5)])))]
3597  "TARGET_SSE"
3598{
3599  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3600
3601  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3602
3603  /* Fix up the destination if needed.  */
3604  if (dst != operands[0])
3605    emit_move_insn (operands[0], dst);
3606
3607  DONE;
3608})
3609
3610(define_insn "sse_movlhps"
3611  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3612	(vec_select:V4SF
3613	  (vec_concat:V8SF
3614	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3615	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3616	  (parallel [(const_int 0)
3617		     (const_int 1)
3618		     (const_int 4)
3619		     (const_int 5)])))]
3620  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3621  "@
3622   movlhps\t{%2, %0|%0, %2}
3623   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3624   movhps\t{%2, %0|%0, %2}
3625   vmovhps\t{%2, %1, %0|%0, %1, %2}
3626   %vmovlps\t{%2, %H0|%H0, %2}"
3627  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3628   (set_attr "type" "ssemov")
3629   (set_attr "ssememalign" "64")
3630   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3631   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3632
3633;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3634(define_insn "avx_unpckhps256"
3635  [(set (match_operand:V8SF 0 "register_operand" "=x")
3636	(vec_select:V8SF
3637	  (vec_concat:V16SF
3638	    (match_operand:V8SF 1 "register_operand" "x")
3639	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3640	  (parallel [(const_int 2) (const_int 10)
3641		     (const_int 3) (const_int 11)
3642		     (const_int 6) (const_int 14)
3643		     (const_int 7) (const_int 15)])))]
3644  "TARGET_AVX"
3645  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3646  [(set_attr "type" "sselog")
3647   (set_attr "prefix" "vex")
3648   (set_attr "mode" "V8SF")])
3649
3650(define_expand "vec_interleave_highv8sf"
3651  [(set (match_dup 3)
3652	(vec_select:V8SF
3653	  (vec_concat:V16SF
3654	    (match_operand:V8SF 1 "register_operand" "x")
3655	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3656	  (parallel [(const_int 0) (const_int 8)
3657		     (const_int 1) (const_int 9)
3658		     (const_int 4) (const_int 12)
3659		     (const_int 5) (const_int 13)])))
3660   (set (match_dup 4)
3661	(vec_select:V8SF
3662	  (vec_concat:V16SF
3663	    (match_dup 1)
3664	    (match_dup 2))
3665	  (parallel [(const_int 2) (const_int 10)
3666		     (const_int 3) (const_int 11)
3667		     (const_int 6) (const_int 14)
3668		     (const_int 7) (const_int 15)])))
3669   (set (match_operand:V8SF 0 "register_operand")
3670	(vec_select:V8SF
3671	  (vec_concat:V16SF
3672	    (match_dup 3)
3673	    (match_dup 4))
3674	  (parallel [(const_int 4) (const_int 5)
3675		     (const_int 6) (const_int 7)
3676		     (const_int 12) (const_int 13)
3677		     (const_int 14) (const_int 15)])))]
3678 "TARGET_AVX"
3679{
3680  operands[3] = gen_reg_rtx (V8SFmode);
3681  operands[4] = gen_reg_rtx (V8SFmode);
3682})
3683
3684(define_insn "vec_interleave_highv4sf"
3685  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3686	(vec_select:V4SF
3687	  (vec_concat:V8SF
3688	    (match_operand:V4SF 1 "register_operand" "0,x")
3689	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3690	  (parallel [(const_int 2) (const_int 6)
3691		     (const_int 3) (const_int 7)])))]
3692  "TARGET_SSE"
3693  "@
3694   unpckhps\t{%2, %0|%0, %2}
3695   vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3696  [(set_attr "isa" "noavx,avx")
3697   (set_attr "type" "sselog")
3698   (set_attr "prefix" "orig,vex")
3699   (set_attr "mode" "V4SF")])
3700
3701;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3702(define_insn "avx_unpcklps256"
3703  [(set (match_operand:V8SF 0 "register_operand" "=x")
3704	(vec_select:V8SF
3705	  (vec_concat:V16SF
3706	    (match_operand:V8SF 1 "register_operand" "x")
3707	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3708	  (parallel [(const_int 0) (const_int 8)
3709		     (const_int 1) (const_int 9)
3710		     (const_int 4) (const_int 12)
3711		     (const_int 5) (const_int 13)])))]
3712  "TARGET_AVX"
3713  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3714  [(set_attr "type" "sselog")
3715   (set_attr "prefix" "vex")
3716   (set_attr "mode" "V8SF")])
3717
3718(define_expand "vec_interleave_lowv8sf"
3719  [(set (match_dup 3)
3720	(vec_select:V8SF
3721	  (vec_concat:V16SF
3722	    (match_operand:V8SF 1 "register_operand" "x")
3723	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3724	  (parallel [(const_int 0) (const_int 8)
3725		     (const_int 1) (const_int 9)
3726		     (const_int 4) (const_int 12)
3727		     (const_int 5) (const_int 13)])))
3728   (set (match_dup 4)
3729	(vec_select:V8SF
3730	  (vec_concat:V16SF
3731	    (match_dup 1)
3732	    (match_dup 2))
3733	  (parallel [(const_int 2) (const_int 10)
3734		     (const_int 3) (const_int 11)
3735		     (const_int 6) (const_int 14)
3736		     (const_int 7) (const_int 15)])))
3737   (set (match_operand:V8SF 0 "register_operand")
3738	(vec_select:V8SF
3739	  (vec_concat:V16SF
3740	    (match_dup 3)
3741	    (match_dup 4))
3742	  (parallel [(const_int 0) (const_int 1)
3743		     (const_int 2) (const_int 3)
3744		     (const_int 8) (const_int 9)
3745		     (const_int 10) (const_int 11)])))]
3746 "TARGET_AVX"
3747{
3748  operands[3] = gen_reg_rtx (V8SFmode);
3749  operands[4] = gen_reg_rtx (V8SFmode);
3750})
3751
3752(define_insn "vec_interleave_lowv4sf"
3753  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3754	(vec_select:V4SF
3755	  (vec_concat:V8SF
3756	    (match_operand:V4SF 1 "register_operand" "0,x")
3757	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3758	  (parallel [(const_int 0) (const_int 4)
3759		     (const_int 1) (const_int 5)])))]
3760  "TARGET_SSE"
3761  "@
3762   unpcklps\t{%2, %0|%0, %2}
3763   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3764  [(set_attr "isa" "noavx,avx")
3765   (set_attr "type" "sselog")
3766   (set_attr "prefix" "orig,vex")
3767   (set_attr "mode" "V4SF")])
3768
3769;; These are modeled with the same vec_concat as the others so that we
3770;; capture users of shufps that can use the new instructions
3771(define_insn "avx_movshdup256"
3772  [(set (match_operand:V8SF 0 "register_operand" "=x")
3773	(vec_select:V8SF
3774	  (vec_concat:V16SF
3775	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3776	    (match_dup 1))
3777	  (parallel [(const_int 1) (const_int 1)
3778		     (const_int 3) (const_int 3)
3779		     (const_int 5) (const_int 5)
3780		     (const_int 7) (const_int 7)])))]
3781  "TARGET_AVX"
3782  "vmovshdup\t{%1, %0|%0, %1}"
3783  [(set_attr "type" "sse")
3784   (set_attr "prefix" "vex")
3785   (set_attr "mode" "V8SF")])
3786
3787(define_insn "sse3_movshdup"
3788  [(set (match_operand:V4SF 0 "register_operand" "=x")
3789	(vec_select:V4SF
3790	  (vec_concat:V8SF
3791	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3792	    (match_dup 1))
3793	  (parallel [(const_int 1)
3794		     (const_int 1)
3795		     (const_int 7)
3796		     (const_int 7)])))]
3797  "TARGET_SSE3"
3798  "%vmovshdup\t{%1, %0|%0, %1}"
3799  [(set_attr "type" "sse")
3800   (set_attr "prefix_rep" "1")
3801   (set_attr "prefix" "maybe_vex")
3802   (set_attr "mode" "V4SF")])
3803
3804(define_insn "avx_movsldup256"
3805  [(set (match_operand:V8SF 0 "register_operand" "=x")
3806	(vec_select:V8SF
3807	  (vec_concat:V16SF
3808	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3809	    (match_dup 1))
3810	  (parallel [(const_int 0) (const_int 0)
3811		     (const_int 2) (const_int 2)
3812		     (const_int 4) (const_int 4)
3813		     (const_int 6) (const_int 6)])))]
3814  "TARGET_AVX"
3815  "vmovsldup\t{%1, %0|%0, %1}"
3816  [(set_attr "type" "sse")
3817   (set_attr "prefix" "vex")
3818   (set_attr "mode" "V8SF")])
3819
3820(define_insn "sse3_movsldup"
3821  [(set (match_operand:V4SF 0 "register_operand" "=x")
3822	(vec_select:V4SF
3823	  (vec_concat:V8SF
3824	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3825	    (match_dup 1))
3826	  (parallel [(const_int 0)
3827		     (const_int 0)
3828		     (const_int 6)
3829		     (const_int 6)])))]
3830  "TARGET_SSE3"
3831  "%vmovsldup\t{%1, %0|%0, %1}"
3832  [(set_attr "type" "sse")
3833   (set_attr "prefix_rep" "1")
3834   (set_attr "prefix" "maybe_vex")
3835   (set_attr "mode" "V4SF")])
3836
3837(define_expand "avx_shufps256"
3838  [(match_operand:V8SF 0 "register_operand")
3839   (match_operand:V8SF 1 "register_operand")
3840   (match_operand:V8SF 2 "nonimmediate_operand")
3841   (match_operand:SI 3 "const_int_operand")]
3842  "TARGET_AVX"
3843{
3844  int mask = INTVAL (operands[3]);
3845  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3846				  GEN_INT ((mask >> 0) & 3),
3847				  GEN_INT ((mask >> 2) & 3),
3848				  GEN_INT (((mask >> 4) & 3) + 8),
3849				  GEN_INT (((mask >> 6) & 3) + 8),
3850				  GEN_INT (((mask >> 0) & 3) + 4),
3851				  GEN_INT (((mask >> 2) & 3) + 4),
3852				  GEN_INT (((mask >> 4) & 3) + 12),
3853				  GEN_INT (((mask >> 6) & 3) + 12)));
3854  DONE;
3855})
3856
3857;; One bit in mask selects 2 elements.
3858(define_insn "avx_shufps256_1"
3859  [(set (match_operand:V8SF 0 "register_operand" "=x")
3860	(vec_select:V8SF
3861	  (vec_concat:V16SF
3862	    (match_operand:V8SF 1 "register_operand" "x")
3863	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3864	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
3865		     (match_operand 4  "const_0_to_3_operand"  )
3866		     (match_operand 5  "const_8_to_11_operand" )
3867		     (match_operand 6  "const_8_to_11_operand" )
3868		     (match_operand 7  "const_4_to_7_operand"  )
3869		     (match_operand 8  "const_4_to_7_operand"  )
3870		     (match_operand 9  "const_12_to_15_operand")
3871		     (match_operand 10 "const_12_to_15_operand")])))]
3872  "TARGET_AVX
3873   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3874       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3875       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3876       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3877{
3878  int mask;
3879  mask = INTVAL (operands[3]);
3880  mask |= INTVAL (operands[4]) << 2;
3881  mask |= (INTVAL (operands[5]) - 8) << 4;
3882  mask |= (INTVAL (operands[6]) - 8) << 6;
3883  operands[3] = GEN_INT (mask);
3884
3885  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3886}
3887  [(set_attr "type" "sseshuf")
3888   (set_attr "length_immediate" "1")
3889   (set_attr "prefix" "vex")
3890   (set_attr "mode" "V8SF")])
3891
3892(define_expand "sse_shufps"
3893  [(match_operand:V4SF 0 "register_operand")
3894   (match_operand:V4SF 1 "register_operand")
3895   (match_operand:V4SF 2 "nonimmediate_operand")
3896   (match_operand:SI 3 "const_int_operand")]
3897  "TARGET_SSE"
3898{
3899  int mask = INTVAL (operands[3]);
3900  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3901			       GEN_INT ((mask >> 0) & 3),
3902			       GEN_INT ((mask >> 2) & 3),
3903			       GEN_INT (((mask >> 4) & 3) + 4),
3904			       GEN_INT (((mask >> 6) & 3) + 4)));
3905  DONE;
3906})
3907
3908(define_insn "sse_shufps_<mode>"
3909  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3910	(vec_select:VI4F_128
3911	  (vec_concat:<ssedoublevecmode>
3912	    (match_operand:VI4F_128 1 "register_operand" "0,x")
3913	    (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3914	  (parallel [(match_operand 3 "const_0_to_3_operand")
3915		     (match_operand 4 "const_0_to_3_operand")
3916		     (match_operand 5 "const_4_to_7_operand")
3917		     (match_operand 6 "const_4_to_7_operand")])))]
3918  "TARGET_SSE"
3919{
3920  int mask = 0;
3921  mask |= INTVAL (operands[3]) << 0;
3922  mask |= INTVAL (operands[4]) << 2;
3923  mask |= (INTVAL (operands[5]) - 4) << 4;
3924  mask |= (INTVAL (operands[6]) - 4) << 6;
3925  operands[3] = GEN_INT (mask);
3926
3927  switch (which_alternative)
3928    {
3929    case 0:
3930      return "shufps\t{%3, %2, %0|%0, %2, %3}";
3931    case 1:
3932      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3933    default:
3934      gcc_unreachable ();
3935    }
3936}
3937  [(set_attr "isa" "noavx,avx")
3938   (set_attr "type" "sseshuf")
3939   (set_attr "length_immediate" "1")
3940   (set_attr "prefix" "orig,vex")
3941   (set_attr "mode" "V4SF")])
3942
3943(define_insn "sse_storehps"
3944  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3945	(vec_select:V2SF
3946	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3947	  (parallel [(const_int 2) (const_int 3)])))]
3948  "TARGET_SSE"
3949  "@
3950   %vmovhps\t{%1, %0|%0, %1}
3951   %vmovhlps\t{%1, %d0|%d0, %1}
3952   %vmovlps\t{%H1, %d0|%d0, %H1}"
3953  [(set_attr "type" "ssemov")
3954   (set_attr "ssememalign" "64")
3955   (set_attr "prefix" "maybe_vex")
3956   (set_attr "mode" "V2SF,V4SF,V2SF")])
3957
3958(define_expand "sse_loadhps_exp"
3959  [(set (match_operand:V4SF 0 "nonimmediate_operand")
3960	(vec_concat:V4SF
3961	  (vec_select:V2SF
3962	    (match_operand:V4SF 1 "nonimmediate_operand")
3963	    (parallel [(const_int 0) (const_int 1)]))
3964	  (match_operand:V2SF 2 "nonimmediate_operand")))]
3965  "TARGET_SSE"
3966{
3967  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3968
3969  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3970
3971  /* Fix up the destination if needed.  */
3972  if (dst != operands[0])
3973    emit_move_insn (operands[0], dst);
3974
3975  DONE;
3976})
3977
3978(define_insn "sse_loadhps"
3979  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3980	(vec_concat:V4SF
3981	  (vec_select:V2SF
3982	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3983	    (parallel [(const_int 0) (const_int 1)]))
3984	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
3985  "TARGET_SSE"
3986  "@
3987   movhps\t{%2, %0|%0, %2}
3988   vmovhps\t{%2, %1, %0|%0, %1, %2}
3989   movlhps\t{%2, %0|%0, %2}
3990   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3991   %vmovlps\t{%2, %H0|%H0, %2}"
3992  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3993   (set_attr "type" "ssemov")
3994   (set_attr "ssememalign" "64")
3995   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3996   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3997
3998(define_insn "sse_storelps"
3999  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
4000	(vec_select:V2SF
4001	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
4002	  (parallel [(const_int 0) (const_int 1)])))]
4003  "TARGET_SSE"
4004  "@
4005   %vmovlps\t{%1, %0|%0, %1}
4006   %vmovaps\t{%1, %0|%0, %1}
4007   %vmovlps\t{%1, %d0|%d0, %1}"
4008  [(set_attr "type" "ssemov")
4009   (set_attr "prefix" "maybe_vex")
4010   (set_attr "mode" "V2SF,V4SF,V2SF")])
4011
4012(define_expand "sse_loadlps_exp"
4013  [(set (match_operand:V4SF 0 "nonimmediate_operand")
4014	(vec_concat:V4SF
4015	  (match_operand:V2SF 2 "nonimmediate_operand")
4016	  (vec_select:V2SF
4017	    (match_operand:V4SF 1 "nonimmediate_operand")
4018	    (parallel [(const_int 2) (const_int 3)]))))]
4019  "TARGET_SSE"
4020{
4021  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4022
4023  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
4024
4025  /* Fix up the destination if needed.  */
4026  if (dst != operands[0])
4027    emit_move_insn (operands[0], dst);
4028
4029  DONE;
4030})
4031
4032(define_insn "sse_loadlps"
4033  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
4034	(vec_concat:V4SF
4035	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
4036	  (vec_select:V2SF
4037	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
4038	    (parallel [(const_int 2) (const_int 3)]))))]
4039  "TARGET_SSE"
4040  "@
4041   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
4042   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
4043   movlps\t{%2, %0|%0, %2}
4044   vmovlps\t{%2, %1, %0|%0, %1, %2}
4045   %vmovlps\t{%2, %0|%0, %2}"
4046  [(set_attr "isa" "noavx,avx,noavx,avx,*")
4047   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
4048   (set_attr "ssememalign" "64")
4049   (set_attr "length_immediate" "1,1,*,*,*")
4050   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4051   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4052
4053(define_insn "sse_movss"
4054  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
4055	(vec_merge:V4SF
4056	  (match_operand:V4SF 2 "register_operand" " x,x")
4057	  (match_operand:V4SF 1 "register_operand" " 0,x")
4058	  (const_int 1)))]
4059  "TARGET_SSE"
4060  "@
4061   movss\t{%2, %0|%0, %2}
4062   vmovss\t{%2, %1, %0|%0, %1, %2}"
4063  [(set_attr "isa" "noavx,avx")
4064   (set_attr "type" "ssemov")
4065   (set_attr "prefix" "orig,vex")
4066   (set_attr "mode" "SF")])
4067
4068(define_insn "avx2_vec_dup<mode>"
4069  [(set (match_operand:VF1 0 "register_operand" "=x")
4070	(vec_duplicate:VF1
4071	  (vec_select:SF
4072	    (match_operand:V4SF 1 "register_operand" "x")
4073	    (parallel [(const_int 0)]))))]
4074  "TARGET_AVX2"
4075  "vbroadcastss\t{%1, %0|%0, %1}"
4076  [(set_attr "type" "sselog1")
4077    (set_attr "prefix" "vex")
4078    (set_attr "mode" "<MODE>")])
4079
4080(define_insn "avx2_vec_dupv8sf_1"
4081  [(set (match_operand:V8SF 0 "register_operand" "=x")
4082	(vec_duplicate:V8SF
4083	  (vec_select:SF
4084	    (match_operand:V8SF 1 "register_operand" "x")
4085	    (parallel [(const_int 0)]))))]
4086  "TARGET_AVX2"
4087  "vbroadcastss\t{%x1, %0|%0, %x1}"
4088  [(set_attr "type" "sselog1")
4089    (set_attr "prefix" "vex")
4090    (set_attr "mode" "V8SF")])
4091
4092(define_insn "vec_dupv4sf"
4093  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
4094	(vec_duplicate:V4SF
4095	  (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
4096  "TARGET_SSE"
4097  "@
4098   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4099   vbroadcastss\t{%1, %0|%0, %1}
4100   shufps\t{$0, %0, %0|%0, %0, 0}"
4101  [(set_attr "isa" "avx,avx,noavx")
4102   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
4103   (set_attr "length_immediate" "1,0,1")
4104   (set_attr "prefix_extra" "0,1,*")
4105   (set_attr "prefix" "vex,vex,orig")
4106   (set_attr "mode" "V4SF")])
4107
4108;; Although insertps takes register source, we prefer
4109;; unpcklps with register source since it is shorter.
4110(define_insn "*vec_concatv2sf_sse4_1"
4111  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,x,x,*y ,*y")
4112	(vec_concat:V2SF
4113	  (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
4114	  (match_operand:SF 2 "vector_move_operand"  " x,x,m,m,C,*ym, C")))]
4115  "TARGET_SSE4_1"
4116  "@
4117   unpcklps\t{%2, %0|%0, %2}
4118   vunpcklps\t{%2, %1, %0|%0, %1, %2}
4119   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4120   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4121   %vmovss\t{%1, %0|%0, %1}
4122   punpckldq\t{%2, %0|%0, %2}
4123   movd\t{%1, %0|%0, %1}"
4124  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4125   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
4126   (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4127   (set_attr "prefix_extra" "*,*,1,1,*,*,*")
4128   (set_attr "length_immediate" "*,*,1,1,*,*,*")
4129   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4130   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
4131
4132;; ??? In theory we can match memory for the MMX alternative, but allowing
4133;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4134;; alternatives pretty much forces the MMX alternative to be chosen.
4135(define_insn "*vec_concatv2sf_sse"
4136  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
4137	(vec_concat:V2SF
4138	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4139	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
4140  "TARGET_SSE"
4141  "@
4142   unpcklps\t{%2, %0|%0, %2}
4143   movss\t{%1, %0|%0, %1}
4144   punpckldq\t{%2, %0|%0, %2}
4145   movd\t{%1, %0|%0, %1}"
4146  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4147   (set_attr "mode" "V4SF,SF,DI,DI")])
4148
4149(define_insn "*vec_concatv4sf"
4150  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
4151	(vec_concat:V4SF
4152	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
4153	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
4154  "TARGET_SSE"
4155  "@
4156   movlhps\t{%2, %0|%0, %2}
4157   vmovlhps\t{%2, %1, %0|%0, %1, %2}
4158   movhps\t{%2, %0|%0, %2}
4159   vmovhps\t{%2, %1, %0|%0, %1, %2}"
4160  [(set_attr "isa" "noavx,avx,noavx,avx")
4161   (set_attr "type" "ssemov")
4162   (set_attr "prefix" "orig,vex,orig,vex")
4163   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
4164
4165(define_expand "vec_init<mode>"
4166  [(match_operand:V_128 0 "register_operand")
4167   (match_operand 1)]
4168  "TARGET_SSE"
4169{
4170  ix86_expand_vector_init (false, operands[0], operands[1]);
4171  DONE;
4172})
4173
4174;; Avoid combining registers from different units in a single alternative,
4175;; see comment above inline_secondary_memory_needed function in i386.c
4176(define_insn "vec_set<mode>_0"
4177  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
4178	  "=x,x,x ,x,x,x,x  ,x  ,m ,m   ,m")
4179	(vec_merge:VI4F_128
4180	  (vec_duplicate:VI4F_128
4181	    (match_operand:<ssescalarmode> 2 "general_operand"
4182	  " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
4183	  (match_operand:VI4F_128 1 "vector_move_operand"
4184	  " C,C,C ,C,0,x,0  ,x  ,0 ,0   ,0")
4185	  (const_int 1)))]
4186  "TARGET_SSE"
4187  "@
4188   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
4189   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4190   %vmovd\t{%2, %0|%0, %2}
4191   movss\t{%2, %0|%0, %2}
4192   movss\t{%2, %0|%0, %2}
4193   vmovss\t{%2, %1, %0|%0, %1, %2}
4194   pinsrd\t{$0, %2, %0|%0, %2, 0}
4195   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4196   #
4197   #
4198   #"
4199  [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
4200   (set (attr "type")
4201     (cond [(eq_attr "alternative" "0,6,7")
4202	      (const_string "sselog")
4203	    (eq_attr "alternative" "9")
4204	      (const_string "imov")
4205	    (eq_attr "alternative" "10")
4206	      (const_string "fmov")
4207	   ]
4208	   (const_string "ssemov")))
4209   (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
4210   (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
4211   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
4212   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
4213
4214;; A subset is vec_setv4sf.
4215(define_insn "*vec_setv4sf_sse4_1"
4216  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4217	(vec_merge:V4SF
4218	  (vec_duplicate:V4SF
4219	    (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
4220	  (match_operand:V4SF 1 "register_operand" "0,x")
4221	  (match_operand:SI 3 "const_int_operand")))]
4222  "TARGET_SSE4_1
4223   && ((unsigned) exact_log2 (INTVAL (operands[3]))
4224       < GET_MODE_NUNITS (V4SFmode))"
4225{
4226  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4227  switch (which_alternative)
4228    {
4229    case 0:
4230      return "insertps\t{%3, %2, %0|%0, %2, %3}";
4231    case 1:
4232      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4233    default:
4234      gcc_unreachable ();
4235    }
4236}
4237  [(set_attr "isa" "noavx,avx")
4238   (set_attr "type" "sselog")
4239   (set_attr "prefix_data16" "1,*")
4240   (set_attr "prefix_extra" "1")
4241   (set_attr "length_immediate" "1")
4242   (set_attr "prefix" "orig,vex")
4243   (set_attr "mode" "V4SF")])
4244
4245(define_insn "sse4_1_insertps"
4246  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4247	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
4248		      (match_operand:V4SF 1 "register_operand" "0,x")
4249		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
4250		     UNSPEC_INSERTPS))]
4251  "TARGET_SSE4_1"
4252{
4253  if (MEM_P (operands[2]))
4254    {
4255      unsigned count_s = INTVAL (operands[3]) >> 6;
4256      if (count_s)
4257	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
4258      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
4259    }
4260  switch (which_alternative)
4261    {
4262    case 0:
4263      return "insertps\t{%3, %2, %0|%0, %2, %3}";
4264    case 1:
4265      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4266    default:
4267      gcc_unreachable ();
4268    }
4269}
4270  [(set_attr "isa" "noavx,avx")
4271   (set_attr "type" "sselog")
4272   (set_attr "prefix_data16" "1,*")
4273   (set_attr "prefix_extra" "1")
4274   (set_attr "length_immediate" "1")
4275   (set_attr "prefix" "orig,vex")
4276   (set_attr "mode" "V4SF")])
4277
4278(define_split
4279  [(set (match_operand:VI4F_128 0 "memory_operand")
4280	(vec_merge:VI4F_128
4281	  (vec_duplicate:VI4F_128
4282	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
4283	  (match_dup 0)
4284	  (const_int 1)))]
4285  "TARGET_SSE && reload_completed"
4286  [(const_int 0)]
4287{
4288  emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4289		  operands[1]);
4290  DONE;
4291})
4292
4293(define_expand "vec_set<mode>"
4294  [(match_operand:V 0 "register_operand")
4295   (match_operand:<ssescalarmode> 1 "register_operand")
4296   (match_operand 2 "const_int_operand")]
4297  "TARGET_SSE"
4298{
4299  ix86_expand_vector_set (false, operands[0], operands[1],
4300			  INTVAL (operands[2]));
4301  DONE;
4302})
4303
4304(define_insn_and_split "*vec_extractv4sf_0"
4305  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4306	(vec_select:SF
4307	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4308	  (parallel [(const_int 0)])))]
4309  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4310  "#"
4311  "&& reload_completed"
4312  [(const_int 0)]
4313{
4314  rtx op1 = operands[1];
4315  if (REG_P (op1))
4316    op1 = gen_rtx_REG (SFmode, REGNO (op1));
4317  else
4318    op1 = gen_lowpart (SFmode, op1);
4319  emit_move_insn (operands[0], op1);
4320  DONE;
4321})
4322
4323(define_insn_and_split "*sse4_1_extractps"
4324  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4325	(vec_select:SF
4326	  (match_operand:V4SF 1 "register_operand" "x,0,x")
4327	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4328  "TARGET_SSE4_1"
4329  "@
4330   %vextractps\t{%2, %1, %0|%0, %1, %2}
4331   #
4332   #"
4333  "&& reload_completed && SSE_REG_P (operands[0])"
4334  [(const_int 0)]
4335{
4336  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4337  switch (INTVAL (operands[2]))
4338    {
4339    case 1:
4340    case 3:
4341      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4342				      operands[2], operands[2],
4343				      GEN_INT (INTVAL (operands[2]) + 4),
4344				      GEN_INT (INTVAL (operands[2]) + 4)));
4345      break;
4346    case 2:
4347      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4348      break;
4349    default:
4350      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
4351      gcc_unreachable ();
4352    }
4353  DONE;
4354}
4355  [(set_attr "isa" "*,noavx,avx")
4356   (set_attr "type" "sselog,*,*")
4357   (set_attr "prefix_data16" "1,*,*")
4358   (set_attr "prefix_extra" "1,*,*")
4359   (set_attr "length_immediate" "1,*,*")
4360   (set_attr "prefix" "maybe_vex,*,*")
4361   (set_attr "mode" "V4SF,*,*")])
4362
4363(define_insn_and_split "*vec_extract_v4sf_mem"
4364  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4365       (vec_select:SF
4366	 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4367	 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4368  "TARGET_SSE"
4369  "#"
4370  "&& reload_completed"
4371  [(const_int 0)]
4372{
4373  int i = INTVAL (operands[2]);
4374
4375  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4376  DONE;
4377})
4378
4379(define_expand "avx_vextractf128<mode>"
4380  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
4381   (match_operand:V_256 1 "register_operand")
4382   (match_operand:SI 2 "const_0_to_1_operand")]
4383  "TARGET_AVX"
4384{
4385  rtx (*insn)(rtx, rtx);
4386
4387  switch (INTVAL (operands[2]))
4388    {
4389    case 0:
4390      insn = gen_vec_extract_lo_<mode>;
4391      break;
4392    case 1:
4393      insn = gen_vec_extract_hi_<mode>;
4394      break;
4395    default:
4396      gcc_unreachable ();
4397    }
4398
4399  emit_insn (insn (operands[0], operands[1]));
4400  DONE;
4401})
4402
4403(define_insn_and_split "vec_extract_lo_<mode>"
4404  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4405	(vec_select:<ssehalfvecmode>
4406	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4407	  (parallel [(const_int 0) (const_int 1)])))]
4408  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4409  "#"
4410  "&& reload_completed"
4411  [(const_int 0)]
4412{
4413  rtx op1 = operands[1];
4414  if (REG_P (op1))
4415    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4416  else
4417    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4418  emit_move_insn (operands[0], op1);
4419  DONE;
4420})
4421
4422(define_insn "vec_extract_hi_<mode>"
4423  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4424	(vec_select:<ssehalfvecmode>
4425	  (match_operand:VI8F_256 1 "register_operand" "x,x")
4426	  (parallel [(const_int 2) (const_int 3)])))]
4427  "TARGET_AVX"
4428  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4429  [(set_attr "type" "sselog")
4430   (set_attr "prefix_extra" "1")
4431   (set_attr "length_immediate" "1")
4432   (set_attr "memory" "none,store")
4433   (set_attr "prefix" "vex")
4434   (set_attr "mode" "<sseinsnmode>")])
4435
4436(define_insn_and_split "vec_extract_lo_<mode>"
4437  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4438	(vec_select:<ssehalfvecmode>
4439	  (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4440	  (parallel [(const_int 0) (const_int 1)
4441		     (const_int 2) (const_int 3)])))]
4442  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4443  "#"
4444  "&& reload_completed"
4445  [(const_int 0)]
4446{
4447  rtx op1 = operands[1];
4448  if (REG_P (op1))
4449    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4450  else
4451    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4452  emit_move_insn (operands[0], op1);
4453  DONE;
4454})
4455
4456(define_insn "vec_extract_hi_<mode>"
4457  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4458	(vec_select:<ssehalfvecmode>
4459	  (match_operand:VI4F_256 1 "register_operand" "x,x")
4460	  (parallel [(const_int 4) (const_int 5)
4461		     (const_int 6) (const_int 7)])))]
4462  "TARGET_AVX"
4463  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4464  [(set_attr "type" "sselog")
4465   (set_attr "prefix_extra" "1")
4466   (set_attr "length_immediate" "1")
4467   (set_attr "memory" "none,store")
4468   (set_attr "prefix" "vex")
4469   (set_attr "mode" "<sseinsnmode>")])
4470
4471(define_insn_and_split "vec_extract_lo_v16hi"
4472  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4473	(vec_select:V8HI
4474	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4475	  (parallel [(const_int 0) (const_int 1)
4476		     (const_int 2) (const_int 3)
4477		     (const_int 4) (const_int 5)
4478		     (const_int 6) (const_int 7)])))]
4479  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4480  "#"
4481  "&& reload_completed"
4482  [(const_int 0)]
4483{
4484  rtx op1 = operands[1];
4485  if (REG_P (op1))
4486    op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4487  else
4488    op1 = gen_lowpart (V8HImode, op1);
4489  emit_move_insn (operands[0], op1);
4490  DONE;
4491})
4492
4493(define_insn "vec_extract_hi_v16hi"
4494  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4495	(vec_select:V8HI
4496	  (match_operand:V16HI 1 "register_operand" "x,x")
4497	  (parallel [(const_int 8) (const_int 9)
4498		     (const_int 10) (const_int 11)
4499		     (const_int 12) (const_int 13)
4500		     (const_int 14) (const_int 15)])))]
4501  "TARGET_AVX"
4502  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4503  [(set_attr "type" "sselog")
4504   (set_attr "prefix_extra" "1")
4505   (set_attr "length_immediate" "1")
4506   (set_attr "memory" "none,store")
4507   (set_attr "prefix" "vex")
4508   (set_attr "mode" "OI")])
4509
4510(define_insn_and_split "vec_extract_lo_v32qi"
4511  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4512	(vec_select:V16QI
4513	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4514	  (parallel [(const_int 0) (const_int 1)
4515		     (const_int 2) (const_int 3)
4516		     (const_int 4) (const_int 5)
4517		     (const_int 6) (const_int 7)
4518		     (const_int 8) (const_int 9)
4519		     (const_int 10) (const_int 11)
4520		     (const_int 12) (const_int 13)
4521		     (const_int 14) (const_int 15)])))]
4522  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4523  "#"
4524  "&& reload_completed"
4525  [(const_int 0)]
4526{
4527  rtx op1 = operands[1];
4528  if (REG_P (op1))
4529    op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4530  else
4531    op1 = gen_lowpart (V16QImode, op1);
4532  emit_move_insn (operands[0], op1);
4533  DONE;
4534})
4535
4536(define_insn "vec_extract_hi_v32qi"
4537  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4538	(vec_select:V16QI
4539	  (match_operand:V32QI 1 "register_operand" "x,x")
4540	  (parallel [(const_int 16) (const_int 17)
4541		     (const_int 18) (const_int 19)
4542		     (const_int 20) (const_int 21)
4543		     (const_int 22) (const_int 23)
4544		     (const_int 24) (const_int 25)
4545		     (const_int 26) (const_int 27)
4546		     (const_int 28) (const_int 29)
4547		     (const_int 30) (const_int 31)])))]
4548  "TARGET_AVX"
4549  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4550  [(set_attr "type" "sselog")
4551   (set_attr "prefix_extra" "1")
4552   (set_attr "length_immediate" "1")
4553   (set_attr "memory" "none,store")
4554   (set_attr "prefix" "vex")
4555   (set_attr "mode" "OI")])
4556
4557;; Modes handled by vec_extract patterns.
4558(define_mode_iterator VEC_EXTRACT_MODE
4559  [(V32QI "TARGET_AVX") V16QI
4560   (V16HI "TARGET_AVX") V8HI
4561   (V8SI "TARGET_AVX") V4SI
4562   (V4DI "TARGET_AVX") V2DI
4563   (V8SF "TARGET_AVX") V4SF
4564   (V4DF "TARGET_AVX") V2DF])
4565
4566(define_expand "vec_extract<mode>"
4567  [(match_operand:<ssescalarmode> 0 "register_operand")
4568   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
4569   (match_operand 2 "const_int_operand")]
4570  "TARGET_SSE"
4571{
4572  ix86_expand_vector_extract (false, operands[0], operands[1],
4573			      INTVAL (operands[2]));
4574  DONE;
4575})
4576
4577;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4578;;
4579;; Parallel double-precision floating point element swizzling
4580;;
4581;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4582
4583;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4584(define_insn "avx_unpckhpd256"
4585  [(set (match_operand:V4DF 0 "register_operand" "=x")
4586	(vec_select:V4DF
4587	  (vec_concat:V8DF
4588	    (match_operand:V4DF 1 "register_operand" "x")
4589	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4590	  (parallel [(const_int 1) (const_int 5)
4591		     (const_int 3) (const_int 7)])))]
4592  "TARGET_AVX"
4593  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4594  [(set_attr "type" "sselog")
4595   (set_attr "prefix" "vex")
4596   (set_attr "mode" "V4DF")])
4597
4598(define_expand "vec_interleave_highv4df"
4599  [(set (match_dup 3)
4600	(vec_select:V4DF
4601	  (vec_concat:V8DF
4602	    (match_operand:V4DF 1 "register_operand" "x")
4603	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4604	  (parallel [(const_int 0) (const_int 4)
4605		     (const_int 2) (const_int 6)])))
4606   (set (match_dup 4)
4607	(vec_select:V4DF
4608	  (vec_concat:V8DF
4609	    (match_dup 1)
4610	    (match_dup 2))
4611	  (parallel [(const_int 1) (const_int 5)
4612		     (const_int 3) (const_int 7)])))
4613   (set (match_operand:V4DF 0 "register_operand")
4614	(vec_select:V4DF
4615	  (vec_concat:V8DF
4616	    (match_dup 3)
4617	    (match_dup 4))
4618	  (parallel [(const_int 2) (const_int 3)
4619		     (const_int 6) (const_int 7)])))]
4620 "TARGET_AVX"
4621{
4622  operands[3] = gen_reg_rtx (V4DFmode);
4623  operands[4] = gen_reg_rtx (V4DFmode);
4624})
4625
4626
4627(define_expand "vec_interleave_highv2df"
4628  [(set (match_operand:V2DF 0 "register_operand")
4629	(vec_select:V2DF
4630	  (vec_concat:V4DF
4631	    (match_operand:V2DF 1 "nonimmediate_operand")
4632	    (match_operand:V2DF 2 "nonimmediate_operand"))
4633	  (parallel [(const_int 1)
4634		     (const_int 3)])))]
4635  "TARGET_SSE2"
4636{
4637  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4638    operands[2] = force_reg (V2DFmode, operands[2]);
4639})
4640
4641(define_insn "*vec_interleave_highv2df"
4642  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
4643	(vec_select:V2DF
4644	  (vec_concat:V4DF
4645	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4646	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4647	  (parallel [(const_int 1)
4648		     (const_int 3)])))]
4649  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4650  "@
4651   unpckhpd\t{%2, %0|%0, %2}
4652   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4653   %vmovddup\t{%H1, %0|%0, %H1}
4654   movlpd\t{%H1, %0|%0, %H1}
4655   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4656   %vmovhpd\t{%1, %0|%0, %1}"
4657  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4658   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4659   (set_attr "ssememalign" "64")
4660   (set_attr "prefix_data16" "*,*,*,1,*,1")
4661   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4662   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4663
4664;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4665(define_expand "avx_movddup256"
4666  [(set (match_operand:V4DF 0 "register_operand")
4667	(vec_select:V4DF
4668	  (vec_concat:V8DF
4669	    (match_operand:V4DF 1 "nonimmediate_operand")
4670	    (match_dup 1))
4671	  (parallel [(const_int 0) (const_int 4)
4672		     (const_int 2) (const_int 6)])))]
4673  "TARGET_AVX")
4674
4675(define_expand "avx_unpcklpd256"
4676  [(set (match_operand:V4DF 0 "register_operand")
4677	(vec_select:V4DF
4678	  (vec_concat:V8DF
4679	    (match_operand:V4DF 1 "register_operand")
4680	    (match_operand:V4DF 2 "nonimmediate_operand"))
4681	  (parallel [(const_int 0) (const_int 4)
4682		     (const_int 2) (const_int 6)])))]
4683  "TARGET_AVX")
4684
4685(define_insn "*avx_unpcklpd256"
4686  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
4687	(vec_select:V4DF
4688	  (vec_concat:V8DF
4689	    (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4690	    (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4691	  (parallel [(const_int 0) (const_int 4)
4692		     (const_int 2) (const_int 6)])))]
4693  "TARGET_AVX"
4694  "@
4695   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4696   vmovddup\t{%1, %0|%0, %1}"
4697  [(set_attr "type" "sselog")
4698   (set_attr "prefix" "vex")
4699   (set_attr "mode" "V4DF")])
4700
4701(define_expand "vec_interleave_lowv4df"
4702  [(set (match_dup 3)
4703	(vec_select:V4DF
4704	  (vec_concat:V8DF
4705	    (match_operand:V4DF 1 "register_operand" "x")
4706	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4707	  (parallel [(const_int 0) (const_int 4)
4708		     (const_int 2) (const_int 6)])))
4709   (set (match_dup 4)
4710	(vec_select:V4DF
4711	  (vec_concat:V8DF
4712	    (match_dup 1)
4713	    (match_dup 2))
4714	  (parallel [(const_int 1) (const_int 5)
4715		     (const_int 3) (const_int 7)])))
4716   (set (match_operand:V4DF 0 "register_operand")
4717	(vec_select:V4DF
4718	  (vec_concat:V8DF
4719	    (match_dup 3)
4720	    (match_dup 4))
4721	  (parallel [(const_int 0) (const_int 1)
4722		     (const_int 4) (const_int 5)])))]
4723 "TARGET_AVX"
4724{
4725  operands[3] = gen_reg_rtx (V4DFmode);
4726  operands[4] = gen_reg_rtx (V4DFmode);
4727})
4728
4729(define_expand "vec_interleave_lowv2df"
4730  [(set (match_operand:V2DF 0 "register_operand")
4731	(vec_select:V2DF
4732	  (vec_concat:V4DF
4733	    (match_operand:V2DF 1 "nonimmediate_operand")
4734	    (match_operand:V2DF 2 "nonimmediate_operand"))
4735	  (parallel [(const_int 0)
4736		     (const_int 2)])))]
4737  "TARGET_SSE2"
4738{
4739  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4740    operands[1] = force_reg (V2DFmode, operands[1]);
4741})
4742
4743(define_insn "*vec_interleave_lowv2df"
4744  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
4745	(vec_select:V2DF
4746	  (vec_concat:V4DF
4747	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4748	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4749	  (parallel [(const_int 0)
4750		     (const_int 2)])))]
4751  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4752  "@
4753   unpcklpd\t{%2, %0|%0, %2}
4754   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4755   %vmovddup\t{%1, %0|%0, %1}
4756   movhpd\t{%2, %0|%0, %2}
4757   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4758   %vmovlpd\t{%2, %H0|%H0, %2}"
4759  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4760   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4761   (set_attr "ssememalign" "64")
4762   (set_attr "prefix_data16" "*,*,*,1,*,1")
4763   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4764   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4765
4766(define_split
4767  [(set (match_operand:V2DF 0 "memory_operand")
4768	(vec_select:V2DF
4769	  (vec_concat:V4DF
4770	    (match_operand:V2DF 1 "register_operand")
4771	    (match_dup 1))
4772	  (parallel [(const_int 0)
4773		     (const_int 2)])))]
4774  "TARGET_SSE3 && reload_completed"
4775  [(const_int 0)]
4776{
4777  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4778  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4779  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4780  DONE;
4781})
4782
4783(define_split
4784  [(set (match_operand:V2DF 0 "register_operand")
4785	(vec_select:V2DF
4786	  (vec_concat:V4DF
4787	    (match_operand:V2DF 1 "memory_operand")
4788	    (match_dup 1))
4789	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
4790		     (match_operand:SI 3 "const_int_operand")])))]
4791  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4792  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4793{
4794  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4795})
4796
4797(define_expand "avx_shufpd256"
4798  [(match_operand:V4DF 0 "register_operand")
4799   (match_operand:V4DF 1 "register_operand")
4800   (match_operand:V4DF 2 "nonimmediate_operand")
4801   (match_operand:SI 3 "const_int_operand")]
4802  "TARGET_AVX"
4803{
4804  int mask = INTVAL (operands[3]);
4805  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4806				   GEN_INT (mask & 1),
4807				   GEN_INT (mask & 2 ? 5 : 4),
4808				   GEN_INT (mask & 4 ? 3 : 2),
4809				   GEN_INT (mask & 8 ? 7 : 6)));
4810  DONE;
4811})
4812
4813(define_insn "avx_shufpd256_1"
4814  [(set (match_operand:V4DF 0 "register_operand" "=x")
4815	(vec_select:V4DF
4816	  (vec_concat:V8DF
4817	    (match_operand:V4DF 1 "register_operand" "x")
4818	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4819	  (parallel [(match_operand 3 "const_0_to_1_operand")
4820		     (match_operand 4 "const_4_to_5_operand")
4821		     (match_operand 5 "const_2_to_3_operand")
4822		     (match_operand 6 "const_6_to_7_operand")])))]
4823  "TARGET_AVX"
4824{
4825  int mask;
4826  mask = INTVAL (operands[3]);
4827  mask |= (INTVAL (operands[4]) - 4) << 1;
4828  mask |= (INTVAL (operands[5]) - 2) << 2;
4829  mask |= (INTVAL (operands[6]) - 6) << 3;
4830  operands[3] = GEN_INT (mask);
4831
4832  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4833}
4834  [(set_attr "type" "sseshuf")
4835   (set_attr "length_immediate" "1")
4836   (set_attr "prefix" "vex")
4837   (set_attr "mode" "V4DF")])
4838
4839(define_expand "sse2_shufpd"
4840  [(match_operand:V2DF 0 "register_operand")
4841   (match_operand:V2DF 1 "register_operand")
4842   (match_operand:V2DF 2 "nonimmediate_operand")
4843   (match_operand:SI 3 "const_int_operand")]
4844  "TARGET_SSE2"
4845{
4846  int mask = INTVAL (operands[3]);
4847  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4848				GEN_INT (mask & 1),
4849				GEN_INT (mask & 2 ? 3 : 2)));
4850  DONE;
4851})
4852
4853;; punpcklqdq and punpckhqdq are shorter than shufpd.
4854(define_insn "avx2_interleave_highv4di"
4855  [(set (match_operand:V4DI 0 "register_operand" "=x")
4856	(vec_select:V4DI
4857	  (vec_concat:V8DI
4858	    (match_operand:V4DI 1 "register_operand" "x")
4859	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4860	  (parallel [(const_int 1)
4861		     (const_int 5)
4862		     (const_int 3)
4863		     (const_int 7)])))]
4864  "TARGET_AVX2"
4865  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4866  [(set_attr "type" "sselog")
4867   (set_attr "prefix" "vex")
4868   (set_attr "mode" "OI")])
4869
4870(define_insn "vec_interleave_highv2di"
4871  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4872	(vec_select:V2DI
4873	  (vec_concat:V4DI
4874	    (match_operand:V2DI 1 "register_operand" "0,x")
4875	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4876	  (parallel [(const_int 1)
4877		     (const_int 3)])))]
4878  "TARGET_SSE2"
4879  "@
4880   punpckhqdq\t{%2, %0|%0, %2}
4881   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4882  [(set_attr "isa" "noavx,avx")
4883   (set_attr "type" "sselog")
4884   (set_attr "prefix_data16" "1,*")
4885   (set_attr "prefix" "orig,vex")
4886   (set_attr "mode" "TI")])
4887
4888(define_insn "avx2_interleave_lowv4di"
4889  [(set (match_operand:V4DI 0 "register_operand" "=x")
4890	(vec_select:V4DI
4891	  (vec_concat:V8DI
4892	    (match_operand:V4DI 1 "register_operand" "x")
4893	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4894	  (parallel [(const_int 0)
4895		     (const_int 4)
4896		     (const_int 2)
4897		     (const_int 6)])))]
4898  "TARGET_AVX2"
4899  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4900  [(set_attr "type" "sselog")
4901   (set_attr "prefix" "vex")
4902   (set_attr "mode" "OI")])
4903
4904(define_insn "vec_interleave_lowv2di"
4905  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4906	(vec_select:V2DI
4907	  (vec_concat:V4DI
4908	    (match_operand:V2DI 1 "register_operand" "0,x")
4909	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4910	  (parallel [(const_int 0)
4911		     (const_int 2)])))]
4912  "TARGET_SSE2"
4913  "@
4914   punpcklqdq\t{%2, %0|%0, %2}
4915   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4916  [(set_attr "isa" "noavx,avx")
4917   (set_attr "type" "sselog")
4918   (set_attr "prefix_data16" "1,*")
4919   (set_attr "prefix" "orig,vex")
4920   (set_attr "mode" "TI")])
4921
4922(define_insn "sse2_shufpd_<mode>"
4923  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4924	(vec_select:VI8F_128
4925	  (vec_concat:<ssedoublevecmode>
4926	    (match_operand:VI8F_128 1 "register_operand" "0,x")
4927	    (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4928	  (parallel [(match_operand 3 "const_0_to_1_operand")
4929		     (match_operand 4 "const_2_to_3_operand")])))]
4930  "TARGET_SSE2"
4931{
4932  int mask;
4933  mask = INTVAL (operands[3]);
4934  mask |= (INTVAL (operands[4]) - 2) << 1;
4935  operands[3] = GEN_INT (mask);
4936
4937  switch (which_alternative)
4938    {
4939    case 0:
4940      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4941    case 1:
4942      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4943    default:
4944      gcc_unreachable ();
4945    }
4946}
4947  [(set_attr "isa" "noavx,avx")
4948   (set_attr "type" "sseshuf")
4949   (set_attr "length_immediate" "1")
4950   (set_attr "prefix" "orig,vex")
4951   (set_attr "mode" "V2DF")])
4952
4953;; Avoid combining registers from different units in a single alternative,
4954;; see comment above inline_secondary_memory_needed function in i386.c
4955(define_insn "sse2_storehpd"
4956  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
4957	(vec_select:DF
4958	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4959	  (parallel [(const_int 1)])))]
4960  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4961  "@
4962   %vmovhpd\t{%1, %0|%0, %1}
4963   unpckhpd\t%0, %0
4964   vunpckhpd\t{%d1, %0|%0, %d1}
4965   #
4966   #
4967   #"
4968  [(set_attr "isa" "*,noavx,avx,*,*,*")
4969   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4970   (set (attr "prefix_data16")
4971     (if_then_else
4972       (and (eq_attr "alternative" "0")
4973	    (not (match_test "TARGET_AVX")))
4974       (const_string "1")
4975       (const_string "*")))
4976   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4977   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4978
4979(define_split
4980  [(set (match_operand:DF 0 "register_operand")
4981	(vec_select:DF
4982	  (match_operand:V2DF 1 "memory_operand")
4983	  (parallel [(const_int 1)])))]
4984  "TARGET_SSE2 && reload_completed"
4985  [(set (match_dup 0) (match_dup 1))]
4986  "operands[1] = adjust_address (operands[1], DFmode, 8);")
4987
4988(define_insn "*vec_extractv2df_1_sse"
4989  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4990	(vec_select:DF
4991	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4992	  (parallel [(const_int 1)])))]
4993  "!TARGET_SSE2 && TARGET_SSE
4994   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4995  "@
4996   movhps\t{%1, %0|%0, %1}
4997   movhlps\t{%1, %0|%0, %1}
4998   movlps\t{%H1, %0|%0, %H1}"
4999  [(set_attr "type" "ssemov")
5000   (set_attr "ssememalign" "64")
5001   (set_attr "mode" "V2SF,V4SF,V2SF")])
5002
5003;; Avoid combining registers from different units in a single alternative,
5004;; see comment above inline_secondary_memory_needed function in i386.c
5005(define_insn "sse2_storelpd"
5006  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
5007	(vec_select:DF
5008	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
5009	  (parallel [(const_int 0)])))]
5010  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5011  "@
5012   %vmovlpd\t{%1, %0|%0, %1}
5013   #
5014   #
5015   #
5016   #"
5017  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
5018   (set_attr "prefix_data16" "1,*,*,*,*")
5019   (set_attr "prefix" "maybe_vex")
5020   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5021
5022(define_split
5023  [(set (match_operand:DF 0 "register_operand")
5024	(vec_select:DF
5025	  (match_operand:V2DF 1 "nonimmediate_operand")
5026	  (parallel [(const_int 0)])))]
5027  "TARGET_SSE2 && reload_completed"
5028  [(const_int 0)]
5029{
5030  rtx op1 = operands[1];
5031  if (REG_P (op1))
5032    op1 = gen_rtx_REG (DFmode, REGNO (op1));
5033  else
5034    op1 = gen_lowpart (DFmode, op1);
5035  emit_move_insn (operands[0], op1);
5036  DONE;
5037})
5038
5039(define_insn "*vec_extractv2df_0_sse"
5040  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5041	(vec_select:DF
5042	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5043	  (parallel [(const_int 0)])))]
5044  "!TARGET_SSE2 && TARGET_SSE
5045   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5046  "@
5047   movlps\t{%1, %0|%0, %1}
5048   movaps\t{%1, %0|%0, %1}
5049   movlps\t{%1, %0|%0, %1}"
5050  [(set_attr "type" "ssemov")
5051   (set_attr "mode" "V2SF,V4SF,V2SF")])
5052
5053(define_expand "sse2_loadhpd_exp"
5054  [(set (match_operand:V2DF 0 "nonimmediate_operand")
5055	(vec_concat:V2DF
5056	  (vec_select:DF
5057	    (match_operand:V2DF 1 "nonimmediate_operand")
5058	    (parallel [(const_int 0)]))
5059	  (match_operand:DF 2 "nonimmediate_operand")))]
5060  "TARGET_SSE2"
5061{
5062  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5063
5064  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
5065
5066  /* Fix up the destination if needed.  */
5067  if (dst != operands[0])
5068    emit_move_insn (operands[0], dst);
5069
5070  DONE;
5071})
5072
5073;; Avoid combining registers from different units in a single alternative,
5074;; see comment above inline_secondary_memory_needed function in i386.c
5075(define_insn "sse2_loadhpd"
5076  [(set (match_operand:V2DF 0 "nonimmediate_operand"
5077	  "=x,x,x,x,o,o ,o")
5078	(vec_concat:V2DF
5079	  (vec_select:DF
5080	    (match_operand:V2DF 1 "nonimmediate_operand"
5081	  " 0,x,0,x,0,0 ,0")
5082	    (parallel [(const_int 0)]))
5083	  (match_operand:DF 2 "nonimmediate_operand"
5084	  " m,m,x,x,x,*f,r")))]
5085  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5086  "@
5087   movhpd\t{%2, %0|%0, %2}
5088   vmovhpd\t{%2, %1, %0|%0, %1, %2}
5089   unpcklpd\t{%2, %0|%0, %2}
5090   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5091   #
5092   #
5093   #"
5094  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5095   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
5096   (set_attr "ssememalign" "64")
5097   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
5098   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
5099   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5100
5101(define_split
5102  [(set (match_operand:V2DF 0 "memory_operand")
5103	(vec_concat:V2DF
5104	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5105	  (match_operand:DF 1 "register_operand")))]
5106  "TARGET_SSE2 && reload_completed"
5107  [(set (match_dup 0) (match_dup 1))]
5108  "operands[0] = adjust_address (operands[0], DFmode, 8);")
5109
5110(define_expand "sse2_loadlpd_exp"
5111  [(set (match_operand:V2DF 0 "nonimmediate_operand")
5112	(vec_concat:V2DF
5113	  (match_operand:DF 2 "nonimmediate_operand")
5114	  (vec_select:DF
5115	    (match_operand:V2DF 1 "nonimmediate_operand")
5116	    (parallel [(const_int 1)]))))]
5117  "TARGET_SSE2"
5118{
5119  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5120
5121  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5122
5123  /* Fix up the destination if needed.  */
5124  if (dst != operands[0])
5125    emit_move_insn (operands[0], dst);
5126
5127  DONE;
5128})
5129
5130;; Avoid combining registers from different units in a single alternative,
5131;; see comment above inline_secondary_memory_needed function in i386.c
5132(define_insn "sse2_loadlpd"
5133  [(set (match_operand:V2DF 0 "nonimmediate_operand"
5134	  "=x,x,x,x,x,x,x,x,m,m ,m")
5135	(vec_concat:V2DF
5136	  (match_operand:DF 2 "nonimmediate_operand"
5137	  " m,m,m,x,x,0,0,x,x,*f,r")
5138	  (vec_select:DF
5139	    (match_operand:V2DF 1 "vector_move_operand"
5140	  " C,0,x,0,x,x,o,o,0,0 ,0")
5141	    (parallel [(const_int 1)]))))]
5142  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5143  "@
5144   %vmovsd\t{%2, %0|%0, %2}
5145   movlpd\t{%2, %0|%0, %2}
5146   vmovlpd\t{%2, %1, %0|%0, %1, %2}
5147   movsd\t{%2, %0|%0, %2}
5148   vmovsd\t{%2, %1, %0|%0, %1, %2}
5149   shufpd\t{$2, %1, %0|%0, %1, 2}
5150   movhpd\t{%H1, %0|%0, %H1}
5151   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5152   #
5153   #
5154   #"
5155  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
5156   (set (attr "type")
5157     (cond [(eq_attr "alternative" "5")
5158	      (const_string "sselog")
5159	    (eq_attr "alternative" "9")
5160	      (const_string "fmov")
5161	    (eq_attr "alternative" "10")
5162	      (const_string "imov")
5163	   ]
5164	   (const_string "ssemov")))
5165   (set_attr "ssememalign" "64")
5166   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
5167   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
5168   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
5169   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5170
5171(define_split
5172  [(set (match_operand:V2DF 0 "memory_operand")
5173	(vec_concat:V2DF
5174	  (match_operand:DF 1 "register_operand")
5175	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5176  "TARGET_SSE2 && reload_completed"
5177  [(set (match_dup 0) (match_dup 1))]
5178  "operands[0] = adjust_address (operands[0], DFmode, 0);")
5179
5180(define_insn "sse2_movsd"
5181  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
5182	(vec_merge:V2DF
5183	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
5184	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5185	  (const_int 1)))]
5186  "TARGET_SSE2"
5187  "@
5188   movsd\t{%2, %0|%0, %2}
5189   vmovsd\t{%2, %1, %0|%0, %1, %2}
5190   movlpd\t{%2, %0|%0, %2}
5191   vmovlpd\t{%2, %1, %0|%0, %1, %2}
5192   %vmovlpd\t{%2, %0|%0, %2}
5193   shufpd\t{$2, %1, %0|%0, %1, 2}
5194   movhps\t{%H1, %0|%0, %H1}
5195   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5196   %vmovhps\t{%1, %H0|%H0, %1}"
5197  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
5198   (set (attr "type")
5199     (if_then_else
5200       (eq_attr "alternative" "5")
5201       (const_string "sselog")
5202       (const_string "ssemov")))
5203   (set (attr "prefix_data16")
5204     (if_then_else
5205       (and (eq_attr "alternative" "2,4")
5206	    (not (match_test "TARGET_AVX")))
5207       (const_string "1")
5208       (const_string "*")))
5209   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
5210   (set_attr "ssememalign" "64")
5211   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
5212   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5213
5214(define_insn "vec_dupv2df"
5215  [(set (match_operand:V2DF 0 "register_operand"     "=x,x")
5216	(vec_duplicate:V2DF
5217	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
5218  "TARGET_SSE2"
5219  "@
5220   unpcklpd\t%0, %0
5221   %vmovddup\t{%1, %0|%0, %1}"
5222  [(set_attr "isa" "noavx,sse3")
5223   (set_attr "type" "sselog1")
5224   (set_attr "prefix" "orig,maybe_vex")
5225   (set_attr "mode" "V2DF,DF")])
5226
5227(define_insn "*vec_concatv2df"
5228  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x,x,x,x,x,x")
5229	(vec_concat:V2DF
5230	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
5231	  (match_operand:DF 2 "vector_move_operand"  " x,x,1,m,m,C,x,m")))]
5232  "TARGET_SSE"
5233  "@
5234   unpcklpd\t{%2, %0|%0, %2}
5235   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5236   %vmovddup\t{%1, %0|%0, %1}
5237   movhpd\t{%2, %0|%0, %2}
5238   vmovhpd\t{%2, %1, %0|%0, %1, %2}
5239   %vmovsd\t{%1, %0|%0, %1}
5240   movlhps\t{%2, %0|%0, %2}
5241   movhps\t{%2, %0|%0, %2}"
5242  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
5243   (set (attr "type")
5244     (if_then_else
5245       (eq_attr "alternative" "0,1,2")
5246       (const_string "sselog")
5247       (const_string "ssemov")))
5248   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
5249   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
5250   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
5251
5252;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5253;;
5254;; Parallel integral arithmetic
5255;;
5256;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5257
5258(define_expand "neg<mode>2"
5259  [(set (match_operand:VI_AVX2 0 "register_operand")
5260	(minus:VI_AVX2
5261	  (match_dup 2)
5262	  (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5263  "TARGET_SSE2"
5264  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5265
5266(define_expand "<plusminus_insn><mode>3"
5267  [(set (match_operand:VI_AVX2 0 "register_operand")
5268	(plusminus:VI_AVX2
5269	  (match_operand:VI_AVX2 1 "nonimmediate_operand")
5270	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
5271  "TARGET_SSE2"
5272  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5273
5274(define_insn "*<plusminus_insn><mode>3"
5275  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
5276	(plusminus:VI_AVX2
5277	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5278	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5279  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5280  "@
5281   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5282   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5283  [(set_attr "isa" "noavx,avx")
5284   (set_attr "type" "sseiadd")
5285   (set_attr "prefix_data16" "1,*")
5286   (set_attr "prefix" "orig,vex")
5287   (set_attr "mode" "<sseinsnmode>")])
5288
5289(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5290  [(set (match_operand:VI12_AVX2 0 "register_operand")
5291	(sat_plusminus:VI12_AVX2
5292	  (match_operand:VI12_AVX2 1 "nonimmediate_operand")
5293	  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
5294  "TARGET_SSE2"
5295  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5296
5297(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5298  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5299	(sat_plusminus:VI12_AVX2
5300	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5301	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5302  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5303  "@
5304   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5305   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5306  [(set_attr "isa" "noavx,avx")
5307   (set_attr "type" "sseiadd")
5308   (set_attr "prefix_data16" "1,*")
5309   (set_attr "prefix" "orig,vex")
5310   (set_attr "mode" "TI")])
5311
5312(define_expand "mul<mode>3"
5313  [(set (match_operand:VI1_AVX2 0 "register_operand")
5314	(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
5315		       (match_operand:VI1_AVX2 2 "register_operand")))]
5316  "TARGET_SSE2"
5317{
5318  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
5319  DONE;
5320})
5321
5322(define_expand "mul<mode>3"
5323  [(set (match_operand:VI2_AVX2 0 "register_operand")
5324	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
5325		       (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
5326  "TARGET_SSE2"
5327  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5328
5329(define_insn "*mul<mode>3"
5330  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5331	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5332		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5333  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5334  "@
5335   pmullw\t{%2, %0|%0, %2}
5336   vpmullw\t{%2, %1, %0|%0, %1, %2}"
5337  [(set_attr "isa" "noavx,avx")
5338   (set_attr "type" "sseimul")
5339   (set_attr "prefix_data16" "1,*")
5340   (set_attr "prefix" "orig,vex")
5341   (set_attr "mode" "<sseinsnmode>")])
5342
5343(define_expand "<s>mul<mode>3_highpart"
5344  [(set (match_operand:VI2_AVX2 0 "register_operand")
5345	(truncate:VI2_AVX2
5346	  (lshiftrt:<ssedoublemode>
5347	    (mult:<ssedoublemode>
5348	      (any_extend:<ssedoublemode>
5349		(match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5350	      (any_extend:<ssedoublemode>
5351		(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5352	    (const_int 16))))]
5353  "TARGET_SSE2"
5354  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5355
5356(define_insn "*<s>mul<mode>3_highpart"
5357  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5358	(truncate:VI2_AVX2
5359	  (lshiftrt:<ssedoublemode>
5360	    (mult:<ssedoublemode>
5361	      (any_extend:<ssedoublemode>
5362		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5363	      (any_extend:<ssedoublemode>
5364		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5365	    (const_int 16))))]
5366  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5367  "@
5368   pmulh<u>w\t{%2, %0|%0, %2}
5369   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5370  [(set_attr "isa" "noavx,avx")
5371   (set_attr "type" "sseimul")
5372   (set_attr "prefix_data16" "1,*")
5373   (set_attr "prefix" "orig,vex")
5374   (set_attr "mode" "<sseinsnmode>")])
5375
5376(define_expand "vec_widen_umult_even_v8si"
5377  [(set (match_operand:V4DI 0 "register_operand")
5378	(mult:V4DI
5379	  (zero_extend:V4DI
5380	    (vec_select:V4SI
5381	      (match_operand:V8SI 1 "nonimmediate_operand")
5382	      (parallel [(const_int 0) (const_int 2)
5383			 (const_int 4) (const_int 6)])))
5384	  (zero_extend:V4DI
5385	    (vec_select:V4SI
5386	      (match_operand:V8SI 2 "nonimmediate_operand")
5387	      (parallel [(const_int 0) (const_int 2)
5388			 (const_int 4) (const_int 6)])))))]
5389  "TARGET_AVX2"
5390  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5391
5392(define_insn "*vec_widen_umult_even_v8si"
5393  [(set (match_operand:V4DI 0 "register_operand" "=x")
5394	(mult:V4DI
5395	  (zero_extend:V4DI
5396	    (vec_select:V4SI
5397	      (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5398	      (parallel [(const_int 0) (const_int 2)
5399			 (const_int 4) (const_int 6)])))
5400	  (zero_extend:V4DI
5401	    (vec_select:V4SI
5402	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5403	      (parallel [(const_int 0) (const_int 2)
5404			 (const_int 4) (const_int 6)])))))]
5405  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5406  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5407  [(set_attr "type" "sseimul")
5408   (set_attr "prefix" "vex")
5409   (set_attr "mode" "OI")])
5410
5411(define_expand "vec_widen_umult_even_v4si"
5412  [(set (match_operand:V2DI 0 "register_operand")
5413	(mult:V2DI
5414	  (zero_extend:V2DI
5415	    (vec_select:V2SI
5416	      (match_operand:V4SI 1 "nonimmediate_operand")
5417	      (parallel [(const_int 0) (const_int 2)])))
5418	  (zero_extend:V2DI
5419	    (vec_select:V2SI
5420	      (match_operand:V4SI 2 "nonimmediate_operand")
5421	      (parallel [(const_int 0) (const_int 2)])))))]
5422  "TARGET_SSE2"
5423  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5424
5425(define_insn "*vec_widen_umult_even_v4si"
5426  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5427	(mult:V2DI
5428	  (zero_extend:V2DI
5429	    (vec_select:V2SI
5430	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5431	      (parallel [(const_int 0) (const_int 2)])))
5432	  (zero_extend:V2DI
5433	    (vec_select:V2SI
5434	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5435	      (parallel [(const_int 0) (const_int 2)])))))]
5436  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5437  "@
5438   pmuludq\t{%2, %0|%0, %2}
5439   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5440  [(set_attr "isa" "noavx,avx")
5441   (set_attr "type" "sseimul")
5442   (set_attr "prefix_data16" "1,*")
5443   (set_attr "prefix" "orig,vex")
5444   (set_attr "mode" "TI")])
5445
5446(define_expand "vec_widen_smult_even_v8si"
5447  [(set (match_operand:V4DI 0 "register_operand")
5448	(mult:V4DI
5449	  (sign_extend:V4DI
5450	    (vec_select:V4SI
5451	      (match_operand:V8SI 1 "nonimmediate_operand")
5452	      (parallel [(const_int 0) (const_int 2)
5453			 (const_int 4) (const_int 6)])))
5454	  (sign_extend:V4DI
5455	    (vec_select:V4SI
5456	      (match_operand:V8SI 2 "nonimmediate_operand")
5457	      (parallel [(const_int 0) (const_int 2)
5458			 (const_int 4) (const_int 6)])))))]
5459  "TARGET_AVX2"
5460  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5461
5462(define_insn "*vec_widen_smult_even_v8si"
5463  [(set (match_operand:V4DI 0 "register_operand" "=x")
5464	(mult:V4DI
5465	  (sign_extend:V4DI
5466	    (vec_select:V4SI
5467	      (match_operand:V8SI 1 "nonimmediate_operand" "x")
5468	      (parallel [(const_int 0) (const_int 2)
5469			 (const_int 4) (const_int 6)])))
5470	  (sign_extend:V4DI
5471	    (vec_select:V4SI
5472	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5473	      (parallel [(const_int 0) (const_int 2)
5474			 (const_int 4) (const_int 6)])))))]
5475  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5476  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5477  [(set_attr "isa" "avx")
5478   (set_attr "type" "sseimul")
5479   (set_attr "prefix_extra" "1")
5480   (set_attr "prefix" "vex")
5481   (set_attr "mode" "OI")])
5482
5483(define_expand "sse4_1_mulv2siv2di3"
5484  [(set (match_operand:V2DI 0 "register_operand")
5485	(mult:V2DI
5486	  (sign_extend:V2DI
5487	    (vec_select:V2SI
5488	      (match_operand:V4SI 1 "nonimmediate_operand")
5489	      (parallel [(const_int 0) (const_int 2)])))
5490	  (sign_extend:V2DI
5491	    (vec_select:V2SI
5492	      (match_operand:V4SI 2 "nonimmediate_operand")
5493	      (parallel [(const_int 0) (const_int 2)])))))]
5494  "TARGET_SSE4_1"
5495  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5496
5497(define_insn "*sse4_1_mulv2siv2di3"
5498  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5499	(mult:V2DI
5500	  (sign_extend:V2DI
5501	    (vec_select:V2SI
5502	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5503	      (parallel [(const_int 0) (const_int 2)])))
5504	  (sign_extend:V2DI
5505	    (vec_select:V2SI
5506	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5507	      (parallel [(const_int 0) (const_int 2)])))))]
5508  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5509  "@
5510   pmuldq\t{%2, %0|%0, %2}
5511   vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5512  [(set_attr "isa" "noavx,avx")
5513   (set_attr "type" "sseimul")
5514   (set_attr "prefix_data16" "1,*")
5515   (set_attr "prefix_extra" "1")
5516   (set_attr "prefix" "orig,vex")
5517   (set_attr "mode" "TI")])
5518
5519(define_expand "avx2_pmaddwd"
5520  [(set (match_operand:V8SI 0 "register_operand")
5521	(plus:V8SI
5522	  (mult:V8SI
5523	    (sign_extend:V8SI
5524	      (vec_select:V8HI
5525		(match_operand:V16HI 1 "nonimmediate_operand")
5526		(parallel [(const_int 0) (const_int 2)
5527			   (const_int 4) (const_int 6)
5528			   (const_int 8) (const_int 10)
5529			   (const_int 12) (const_int 14)])))
5530	    (sign_extend:V8SI
5531	      (vec_select:V8HI
5532		(match_operand:V16HI 2 "nonimmediate_operand")
5533		(parallel [(const_int 0) (const_int 2)
5534			   (const_int 4) (const_int 6)
5535			   (const_int 8) (const_int 10)
5536			   (const_int 12) (const_int 14)]))))
5537	  (mult:V8SI
5538	    (sign_extend:V8SI
5539	      (vec_select:V8HI (match_dup 1)
5540		(parallel [(const_int 1) (const_int 3)
5541			   (const_int 5) (const_int 7)
5542			   (const_int 9) (const_int 11)
5543			   (const_int 13) (const_int 15)])))
5544	    (sign_extend:V8SI
5545	      (vec_select:V8HI (match_dup 2)
5546		(parallel [(const_int 1) (const_int 3)
5547			   (const_int 5) (const_int 7)
5548			   (const_int 9) (const_int 11)
5549			   (const_int 13) (const_int 15)]))))))]
5550  "TARGET_AVX2"
5551  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5552
5553(define_insn "*avx2_pmaddwd"
5554  [(set (match_operand:V8SI 0 "register_operand" "=x")
5555	(plus:V8SI
5556	  (mult:V8SI
5557	    (sign_extend:V8SI
5558	      (vec_select:V8HI
5559		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
5560		(parallel [(const_int 0) (const_int 2)
5561			   (const_int 4) (const_int 6)
5562			   (const_int 8) (const_int 10)
5563			   (const_int 12) (const_int 14)])))
5564	    (sign_extend:V8SI
5565	      (vec_select:V8HI
5566		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
5567		(parallel [(const_int 0) (const_int 2)
5568			   (const_int 4) (const_int 6)
5569			   (const_int 8) (const_int 10)
5570			   (const_int 12) (const_int 14)]))))
5571	  (mult:V8SI
5572	    (sign_extend:V8SI
5573	      (vec_select:V8HI (match_dup 1)
5574		(parallel [(const_int 1) (const_int 3)
5575			   (const_int 5) (const_int 7)
5576			   (const_int 9) (const_int 11)
5577			   (const_int 13) (const_int 15)])))
5578	    (sign_extend:V8SI
5579	      (vec_select:V8HI (match_dup 2)
5580		(parallel [(const_int 1) (const_int 3)
5581			   (const_int 5) (const_int 7)
5582			   (const_int 9) (const_int 11)
5583			   (const_int 13) (const_int 15)]))))))]
5584  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5585  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5586  [(set_attr "type" "sseiadd")
5587   (set_attr "prefix" "vex")
5588   (set_attr "mode" "OI")])
5589
5590(define_expand "sse2_pmaddwd"
5591  [(set (match_operand:V4SI 0 "register_operand")
5592	(plus:V4SI
5593	  (mult:V4SI
5594	    (sign_extend:V4SI
5595	      (vec_select:V4HI
5596		(match_operand:V8HI 1 "nonimmediate_operand")
5597		(parallel [(const_int 0) (const_int 2)
5598			   (const_int 4) (const_int 6)])))
5599	    (sign_extend:V4SI
5600	      (vec_select:V4HI
5601		(match_operand:V8HI 2 "nonimmediate_operand")
5602		(parallel [(const_int 0) (const_int 2)
5603			   (const_int 4) (const_int 6)]))))
5604	  (mult:V4SI
5605	    (sign_extend:V4SI
5606	      (vec_select:V4HI (match_dup 1)
5607		(parallel [(const_int 1) (const_int 3)
5608			   (const_int 5) (const_int 7)])))
5609	    (sign_extend:V4SI
5610	      (vec_select:V4HI (match_dup 2)
5611		(parallel [(const_int 1) (const_int 3)
5612			   (const_int 5) (const_int 7)]))))))]
5613  "TARGET_SSE2"
5614  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5615
5616(define_insn "*sse2_pmaddwd"
5617  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5618	(plus:V4SI
5619	  (mult:V4SI
5620	    (sign_extend:V4SI
5621	      (vec_select:V4HI
5622		(match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5623		(parallel [(const_int 0) (const_int 2)
5624			   (const_int 4) (const_int 6)])))
5625	    (sign_extend:V4SI
5626	      (vec_select:V4HI
5627		(match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5628		(parallel [(const_int 0) (const_int 2)
5629			   (const_int 4) (const_int 6)]))))
5630	  (mult:V4SI
5631	    (sign_extend:V4SI
5632	      (vec_select:V4HI (match_dup 1)
5633		(parallel [(const_int 1) (const_int 3)
5634			   (const_int 5) (const_int 7)])))
5635	    (sign_extend:V4SI
5636	      (vec_select:V4HI (match_dup 2)
5637		(parallel [(const_int 1) (const_int 3)
5638			   (const_int 5) (const_int 7)]))))))]
5639  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5640  "@
5641   pmaddwd\t{%2, %0|%0, %2}
5642   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5643  [(set_attr "isa" "noavx,avx")
5644   (set_attr "type" "sseiadd")
5645   (set_attr "atom_unit" "simul")
5646   (set_attr "prefix_data16" "1,*")
5647   (set_attr "prefix" "orig,vex")
5648   (set_attr "mode" "TI")])
5649
5650(define_expand "mul<mode>3"
5651  [(set (match_operand:VI4_AVX2 0 "register_operand")
5652	(mult:VI4_AVX2
5653	  (match_operand:VI4_AVX2 1 "nonimmediate_operand")
5654	  (match_operand:VI4_AVX2 2 "nonimmediate_operand")))]
5655  "TARGET_SSE2"
5656{
5657  if (TARGET_SSE4_1)
5658    {
5659      if (CONSTANT_P (operands[2]))
5660	operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[2]));
5661      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5662    }
5663  else
5664    {
5665      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
5666      DONE;
5667    }
5668})
5669
5670(define_insn "*<sse4_1_avx2>_mul<mode>3"
5671  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5672	(mult:VI4_AVX2
5673	  (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5674	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5675  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5676  "@
5677   pmulld\t{%2, %0|%0, %2}
5678   vpmulld\t{%2, %1, %0|%0, %1, %2}"
5679  [(set_attr "isa" "noavx,avx")
5680   (set_attr "type" "sseimul")
5681   (set_attr "prefix_extra" "1")
5682   (set_attr "prefix" "orig,vex")
5683   (set_attr "btver2_decode" "vector,vector")
5684   (set_attr "mode" "<sseinsnmode>")])
5685
5686(define_expand "mul<mode>3"
5687  [(set (match_operand:VI8_AVX2 0 "register_operand")
5688	(mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand")
5689		       (match_operand:VI8_AVX2 2 "register_operand")))]
5690  "TARGET_SSE2"
5691{
5692  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
5693  DONE;
5694})
5695
5696(define_expand "vec_widen_<s>mult_hi_<mode>"
5697  [(match_operand:<sseunpackmode> 0 "register_operand")
5698   (any_extend:<sseunpackmode>
5699     (match_operand:VI124_AVX2 1 "register_operand"))
5700   (match_operand:VI124_AVX2 2 "register_operand")]
5701  "TARGET_SSE2"
5702{
5703  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5704			      <u_bool>, true);
5705  DONE;
5706})
5707
5708(define_expand "vec_widen_<s>mult_lo_<mode>"
5709  [(match_operand:<sseunpackmode> 0 "register_operand")
5710   (any_extend:<sseunpackmode>
5711     (match_operand:VI124_AVX2 1 "register_operand"))
5712   (match_operand:VI124_AVX2 2 "register_operand")]
5713  "TARGET_SSE2"
5714{
5715  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5716			      <u_bool>, false);
5717  DONE;
5718})
5719
5720;; Most widen_<s>mult_even_<mode> can be handled directly from other
5721;; named patterns, but signed V4SI needs special help for plain SSE2.
5722(define_expand "vec_widen_smult_even_v4si"
5723  [(match_operand:V2DI 0 "register_operand")
5724   (match_operand:V4SI 1 "register_operand")
5725   (match_operand:V4SI 2 "register_operand")]
5726  "TARGET_SSE2"
5727{
5728  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5729				 false, false);
5730  DONE;
5731})
5732
5733(define_expand "vec_widen_<s>mult_odd_<mode>"
5734  [(match_operand:<sseunpackmode> 0 "register_operand")
5735   (any_extend:<sseunpackmode>
5736     (match_operand:VI4_AVX2 1 "register_operand"))
5737   (match_operand:VI4_AVX2 2 "register_operand")]
5738  "TARGET_SSE2"
5739{
5740  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5741				 <u_bool>, true);
5742  DONE;
5743})
5744
5745(define_expand "sdot_prod<mode>"
5746  [(match_operand:<sseunpackmode> 0 "register_operand")
5747   (match_operand:VI2_AVX2 1 "register_operand")
5748   (match_operand:VI2_AVX2 2 "register_operand")
5749   (match_operand:<sseunpackmode> 3 "register_operand")]
5750  "TARGET_SSE2"
5751{
5752  rtx t = gen_reg_rtx (<sseunpackmode>mode);
5753  emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5754  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5755			  gen_rtx_PLUS (<sseunpackmode>mode,
5756					operands[3], t)));
5757  DONE;
5758})
5759
5760;; Normally we use widen_mul_even/odd, but combine can't quite get it all
5761;; back together when madd is available.
5762(define_expand "sdot_prodv4si"
5763  [(match_operand:V2DI 0 "register_operand")
5764   (match_operand:V4SI 1 "register_operand")
5765   (match_operand:V4SI 2 "register_operand")
5766   (match_operand:V2DI 3 "register_operand")]
5767  "TARGET_XOP"
5768{
5769  rtx t = gen_reg_rtx (V2DImode);
5770  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
5771  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
5772  DONE;
5773})
5774
5775(define_insn "ashr<mode>3"
5776  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5777	(ashiftrt:VI24_AVX2
5778	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5779	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5780  "TARGET_SSE2"
5781  "@
5782   psra<ssemodesuffix>\t{%2, %0|%0, %2}
5783   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5784  [(set_attr "isa" "noavx,avx")
5785   (set_attr "type" "sseishft")
5786   (set (attr "length_immediate")
5787     (if_then_else (match_operand 2 "const_int_operand")
5788       (const_string "1")
5789       (const_string "0")))
5790   (set_attr "prefix_data16" "1,*")
5791   (set_attr "prefix" "orig,vex")
5792   (set_attr "mode" "<sseinsnmode>")])
5793
5794(define_insn "<shift_insn><mode>3"
5795  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5796	(any_lshift:VI248_AVX2
5797	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5798	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5799  "TARGET_SSE2"
5800  "@
5801   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5802   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5803  [(set_attr "isa" "noavx,avx")
5804   (set_attr "type" "sseishft")
5805   (set (attr "length_immediate")
5806     (if_then_else (match_operand 2 "const_int_operand")
5807       (const_string "1")
5808       (const_string "0")))
5809   (set_attr "prefix_data16" "1,*")
5810   (set_attr "prefix" "orig,vex")
5811   (set_attr "mode" "<sseinsnmode>")])
5812
5813(define_expand "vec_shl_<mode>"
5814  [(set (match_operand:VI_128 0 "register_operand")
5815	(ashift:V1TI
5816	 (match_operand:VI_128 1 "register_operand")
5817	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5818  "TARGET_SSE2"
5819{
5820  operands[0] = gen_lowpart (V1TImode, operands[0]);
5821  operands[1] = gen_lowpart (V1TImode, operands[1]);
5822})
5823
5824(define_insn "<sse2_avx2>_ashl<mode>3"
5825  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5826	(ashift:VIMAX_AVX2
5827	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5828	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5829  "TARGET_SSE2"
5830{
5831  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5832
5833  switch (which_alternative)
5834    {
5835    case 0:
5836      return "pslldq\t{%2, %0|%0, %2}";
5837    case 1:
5838      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5839    default:
5840      gcc_unreachable ();
5841    }
5842}
5843  [(set_attr "isa" "noavx,avx")
5844   (set_attr "type" "sseishft")
5845   (set_attr "length_immediate" "1")
5846   (set_attr "prefix_data16" "1,*")
5847   (set_attr "prefix" "orig,vex")
5848   (set_attr "mode" "<sseinsnmode>")])
5849
5850(define_expand "vec_shr_<mode>"
5851  [(set (match_operand:VI_128 0 "register_operand")
5852	(lshiftrt:V1TI
5853	 (match_operand:VI_128 1 "register_operand")
5854	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5855  "TARGET_SSE2"
5856{
5857  operands[0] = gen_lowpart (V1TImode, operands[0]);
5858  operands[1] = gen_lowpart (V1TImode, operands[1]);
5859})
5860
5861(define_insn "<sse2_avx2>_lshr<mode>3"
5862  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5863	(lshiftrt:VIMAX_AVX2
5864	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5865	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5866  "TARGET_SSE2"
5867{
5868  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5869
5870  switch (which_alternative)
5871    {
5872    case 0:
5873      return "psrldq\t{%2, %0|%0, %2}";
5874    case 1:
5875      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5876    default:
5877      gcc_unreachable ();
5878    }
5879}
5880  [(set_attr "isa" "noavx,avx")
5881   (set_attr "type" "sseishft")
5882   (set_attr "length_immediate" "1")
5883   (set_attr "atom_unit" "sishuf")
5884   (set_attr "prefix_data16" "1,*")
5885   (set_attr "prefix" "orig,vex")
5886   (set_attr "mode" "<sseinsnmode>")])
5887
5888
5889(define_expand "<code><mode>3"
5890  [(set (match_operand:VI124_256 0 "register_operand")
5891	(maxmin:VI124_256
5892	  (match_operand:VI124_256 1 "nonimmediate_operand")
5893	  (match_operand:VI124_256 2 "nonimmediate_operand")))]
5894  "TARGET_AVX2"
5895  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5896
5897(define_insn "*avx2_<code><mode>3"
5898  [(set (match_operand:VI124_256 0 "register_operand" "=x")
5899	(maxmin:VI124_256
5900	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5901	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5902  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5903  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5904  [(set_attr "type" "sseiadd")
5905   (set_attr "prefix_extra" "1")
5906   (set_attr "prefix" "vex")
5907   (set_attr "mode" "OI")])
5908
5909(define_expand "<code><mode>3"
5910  [(set (match_operand:VI8_AVX2 0 "register_operand")
5911	(maxmin:VI8_AVX2
5912	  (match_operand:VI8_AVX2 1 "register_operand")
5913	  (match_operand:VI8_AVX2 2 "register_operand")))]
5914  "TARGET_SSE4_2"
5915{
5916  enum rtx_code code;
5917  rtx xops[6];
5918  bool ok;
5919
5920  xops[0] = operands[0];
5921
5922  if (<CODE> == SMAX || <CODE> == UMAX)
5923    {
5924      xops[1] = operands[1];
5925      xops[2] = operands[2];
5926    }
5927  else
5928    {
5929      xops[1] = operands[2];
5930      xops[2] = operands[1];
5931    }
5932
5933  code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5934
5935  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5936  xops[4] = operands[1];
5937  xops[5] = operands[2];
5938
5939  ok = ix86_expand_int_vcond (xops);
5940  gcc_assert (ok);
5941  DONE;
5942})
5943
5944(define_expand "<code><mode>3"
5945  [(set (match_operand:VI124_128 0 "register_operand")
5946	(smaxmin:VI124_128
5947	  (match_operand:VI124_128 1 "nonimmediate_operand")
5948	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
5949  "TARGET_SSE2"
5950{
5951  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5952    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5953  else
5954    {
5955      rtx xops[6];
5956      bool ok;
5957
5958      xops[0] = operands[0];
5959      operands[1] = force_reg (<MODE>mode, operands[1]);
5960      operands[2] = force_reg (<MODE>mode, operands[2]);
5961
5962      if (<CODE> == SMAX)
5963	{
5964	  xops[1] = operands[1];
5965	  xops[2] = operands[2];
5966	}
5967      else
5968	{
5969	  xops[1] = operands[2];
5970	  xops[2] = operands[1];
5971	}
5972
5973      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5974      xops[4] = operands[1];
5975      xops[5] = operands[2];
5976
5977      ok = ix86_expand_int_vcond (xops);
5978      gcc_assert (ok);
5979      DONE;
5980    }
5981})
5982
5983(define_insn "*sse4_1_<code><mode>3"
5984  [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5985	(smaxmin:VI14_128
5986	  (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5987	  (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5988  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5989  "@
5990   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5991   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5992  [(set_attr "isa" "noavx,avx")
5993   (set_attr "type" "sseiadd")
5994   (set_attr "prefix_extra" "1,*")
5995   (set_attr "prefix" "orig,vex")
5996   (set_attr "mode" "TI")])
5997
5998(define_insn "*<code>v8hi3"
5999  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6000	(smaxmin:V8HI
6001	  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6002	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6003  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6004  "@
6005   p<maxmin_int>w\t{%2, %0|%0, %2}
6006   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6007  [(set_attr "isa" "noavx,avx")
6008   (set_attr "type" "sseiadd")
6009   (set_attr "prefix_data16" "1,*")
6010   (set_attr "prefix_extra" "*,1")
6011   (set_attr "prefix" "orig,vex")
6012   (set_attr "mode" "TI")])
6013
6014(define_expand "<code><mode>3"
6015  [(set (match_operand:VI124_128 0 "register_operand")
6016	(umaxmin:VI124_128
6017	  (match_operand:VI124_128 1 "nonimmediate_operand")
6018	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
6019  "TARGET_SSE2"
6020{
6021  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6022    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6023  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6024    {
6025      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6026      operands[1] = force_reg (<MODE>mode, operands[1]);
6027      if (rtx_equal_p (op3, op2))
6028	op3 = gen_reg_rtx (V8HImode);
6029      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6030      emit_insn (gen_addv8hi3 (op0, op3, op2));
6031      DONE;
6032    }
6033  else
6034    {
6035      rtx xops[6];
6036      bool ok;
6037
6038      operands[1] = force_reg (<MODE>mode, operands[1]);
6039      operands[2] = force_reg (<MODE>mode, operands[2]);
6040
6041      xops[0] = operands[0];
6042
6043      if (<CODE> == UMAX)
6044	{
6045	  xops[1] = operands[1];
6046	  xops[2] = operands[2];
6047	}
6048      else
6049	{
6050	  xops[1] = operands[2];
6051	  xops[2] = operands[1];
6052	}
6053
6054      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6055      xops[4] = operands[1];
6056      xops[5] = operands[2];
6057
6058      ok = ix86_expand_int_vcond (xops);
6059      gcc_assert (ok);
6060      DONE;
6061    }
6062})
6063
6064(define_insn "*sse4_1_<code><mode>3"
6065  [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6066	(umaxmin:VI24_128
6067	  (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6068	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6069  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6070  "@
6071   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6072   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6073  [(set_attr "isa" "noavx,avx")
6074   (set_attr "type" "sseiadd")
6075   (set_attr "prefix_extra" "1,*")
6076   (set_attr "prefix" "orig,vex")
6077   (set_attr "mode" "TI")])
6078
6079(define_insn "*<code>v16qi3"
6080  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6081	(umaxmin:V16QI
6082	  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6083	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6084  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6085  "@
6086   p<maxmin_int>b\t{%2, %0|%0, %2}
6087   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6088  [(set_attr "isa" "noavx,avx")
6089   (set_attr "type" "sseiadd")
6090   (set_attr "prefix_data16" "1,*")
6091   (set_attr "prefix_extra" "*,1")
6092   (set_attr "prefix" "orig,vex")
6093   (set_attr "mode" "TI")])
6094
6095;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6096;;
6097;; Parallel integral comparisons
6098;;
6099;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6100
6101(define_expand "avx2_eq<mode>3"
6102  [(set (match_operand:VI_256 0 "register_operand")
6103	(eq:VI_256
6104	  (match_operand:VI_256 1 "nonimmediate_operand")
6105	  (match_operand:VI_256 2 "nonimmediate_operand")))]
6106  "TARGET_AVX2"
6107  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6108
6109(define_insn "*avx2_eq<mode>3"
6110  [(set (match_operand:VI_256 0 "register_operand" "=x")
6111	(eq:VI_256
6112	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6113	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6114  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6115  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6116  [(set_attr "type" "ssecmp")
6117   (set_attr "prefix_extra" "1")
6118   (set_attr "prefix" "vex")
6119   (set_attr "mode" "OI")])
6120
6121(define_insn "*sse4_1_eqv2di3"
6122  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6123	(eq:V2DI
6124	  (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6125	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6126  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6127  "@
6128   pcmpeqq\t{%2, %0|%0, %2}
6129   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6130  [(set_attr "isa" "noavx,avx")
6131   (set_attr "type" "ssecmp")
6132   (set_attr "prefix_extra" "1")
6133   (set_attr "prefix" "orig,vex")
6134   (set_attr "mode" "TI")])
6135
6136(define_insn "*sse2_eq<mode>3"
6137  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6138	(eq:VI124_128
6139	  (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6140	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6141  "TARGET_SSE2 && !TARGET_XOP
6142   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6143  "@
6144   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6145   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6146  [(set_attr "isa" "noavx,avx")
6147   (set_attr "type" "ssecmp")
6148   (set_attr "prefix_data16" "1,*")
6149   (set_attr "prefix" "orig,vex")
6150   (set_attr "mode" "TI")])
6151
6152(define_expand "sse2_eq<mode>3"
6153  [(set (match_operand:VI124_128 0 "register_operand")
6154	(eq:VI124_128
6155	  (match_operand:VI124_128 1 "nonimmediate_operand")
6156	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
6157  "TARGET_SSE2 && !TARGET_XOP "
6158  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6159
6160(define_expand "sse4_1_eqv2di3"
6161  [(set (match_operand:V2DI 0 "register_operand")
6162	(eq:V2DI
6163	  (match_operand:V2DI 1 "nonimmediate_operand")
6164	  (match_operand:V2DI 2 "nonimmediate_operand")))]
6165  "TARGET_SSE4_1"
6166  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6167
6168(define_insn "sse4_2_gtv2di3"
6169  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6170	(gt:V2DI
6171	  (match_operand:V2DI 1 "register_operand" "0,x")
6172	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6173  "TARGET_SSE4_2"
6174  "@
6175   pcmpgtq\t{%2, %0|%0, %2}
6176   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6177  [(set_attr "isa" "noavx,avx")
6178   (set_attr "type" "ssecmp")
6179   (set_attr "prefix_extra" "1")
6180   (set_attr "prefix" "orig,vex")
6181   (set_attr "mode" "TI")])
6182
6183(define_insn "avx2_gt<mode>3"
6184  [(set (match_operand:VI_256 0 "register_operand" "=x")
6185	(gt:VI_256
6186	  (match_operand:VI_256 1 "register_operand" "x")
6187	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6188  "TARGET_AVX2"
6189  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6190  [(set_attr "type" "ssecmp")
6191   (set_attr "prefix_extra" "1")
6192   (set_attr "prefix" "vex")
6193   (set_attr "mode" "OI")])
6194
6195(define_insn "sse2_gt<mode>3"
6196  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6197	(gt:VI124_128
6198	  (match_operand:VI124_128 1 "register_operand" "0,x")
6199	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6200  "TARGET_SSE2 && !TARGET_XOP"
6201  "@
6202   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6203   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6204  [(set_attr "isa" "noavx,avx")
6205   (set_attr "type" "ssecmp")
6206   (set_attr "prefix_data16" "1,*")
6207   (set_attr "prefix" "orig,vex")
6208   (set_attr "mode" "TI")])
6209
6210(define_expand "vcond<V_256:mode><VI_256:mode>"
6211  [(set (match_operand:V_256 0 "register_operand")
6212	(if_then_else:V_256
6213	  (match_operator 3 ""
6214	    [(match_operand:VI_256 4 "nonimmediate_operand")
6215	     (match_operand:VI_256 5 "general_operand")])
6216	  (match_operand:V_256 1)
6217	  (match_operand:V_256 2)))]
6218  "TARGET_AVX2
6219   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6220       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6221{
6222  bool ok = ix86_expand_int_vcond (operands);
6223  gcc_assert (ok);
6224  DONE;
6225})
6226
6227(define_expand "vcond<V_128:mode><VI124_128:mode>"
6228  [(set (match_operand:V_128 0 "register_operand")
6229	(if_then_else:V_128
6230	  (match_operator 3 ""
6231	    [(match_operand:VI124_128 4 "nonimmediate_operand")
6232	     (match_operand:VI124_128 5 "general_operand")])
6233	  (match_operand:V_128 1)
6234	  (match_operand:V_128 2)))]
6235  "TARGET_SSE2
6236   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6237       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6238{
6239  bool ok = ix86_expand_int_vcond (operands);
6240  gcc_assert (ok);
6241  DONE;
6242})
6243
6244(define_expand "vcond<VI8F_128:mode>v2di"
6245  [(set (match_operand:VI8F_128 0 "register_operand")
6246	(if_then_else:VI8F_128
6247	  (match_operator 3 ""
6248	    [(match_operand:V2DI 4 "nonimmediate_operand")
6249	     (match_operand:V2DI 5 "general_operand")])
6250	  (match_operand:VI8F_128 1)
6251	  (match_operand:VI8F_128 2)))]
6252  "TARGET_SSE4_2"
6253{
6254  bool ok = ix86_expand_int_vcond (operands);
6255  gcc_assert (ok);
6256  DONE;
6257})
6258
6259(define_expand "vcondu<V_256:mode><VI_256:mode>"
6260  [(set (match_operand:V_256 0 "register_operand")
6261	(if_then_else:V_256
6262	  (match_operator 3 ""
6263	    [(match_operand:VI_256 4 "nonimmediate_operand")
6264	     (match_operand:VI_256 5 "nonimmediate_operand")])
6265	  (match_operand:V_256 1 "general_operand")
6266	  (match_operand:V_256 2 "general_operand")))]
6267  "TARGET_AVX2
6268   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6269       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6270{
6271  bool ok = ix86_expand_int_vcond (operands);
6272  gcc_assert (ok);
6273  DONE;
6274})
6275
6276(define_expand "vcondu<V_128:mode><VI124_128:mode>"
6277  [(set (match_operand:V_128 0 "register_operand")
6278	(if_then_else:V_128
6279	  (match_operator 3 ""
6280	    [(match_operand:VI124_128 4 "nonimmediate_operand")
6281	     (match_operand:VI124_128 5 "nonimmediate_operand")])
6282	  (match_operand:V_128 1 "general_operand")
6283	  (match_operand:V_128 2 "general_operand")))]
6284  "TARGET_SSE2
6285   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6286       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6287{
6288  bool ok = ix86_expand_int_vcond (operands);
6289  gcc_assert (ok);
6290  DONE;
6291})
6292
6293(define_expand "vcondu<VI8F_128:mode>v2di"
6294  [(set (match_operand:VI8F_128 0 "register_operand")
6295	(if_then_else:VI8F_128
6296	  (match_operator 3 ""
6297	    [(match_operand:V2DI 4 "nonimmediate_operand")
6298	     (match_operand:V2DI 5 "nonimmediate_operand")])
6299	  (match_operand:VI8F_128 1 "general_operand")
6300	  (match_operand:VI8F_128 2 "general_operand")))]
6301  "TARGET_SSE4_2"
6302{
6303  bool ok = ix86_expand_int_vcond (operands);
6304  gcc_assert (ok);
6305  DONE;
6306})
6307
6308(define_mode_iterator VEC_PERM_AVX2
6309  [V16QI V8HI V4SI V2DI V4SF V2DF
6310   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6311   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6312   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6313
6314(define_expand "vec_perm<mode>"
6315  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
6316   (match_operand:VEC_PERM_AVX2 1 "register_operand")
6317   (match_operand:VEC_PERM_AVX2 2 "register_operand")
6318   (match_operand:<sseintvecmode> 3 "register_operand")]
6319  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6320{
6321  ix86_expand_vec_perm (operands);
6322  DONE;
6323})
6324
6325(define_mode_iterator VEC_PERM_CONST
6326  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6327   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6328   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6329   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6330   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6331   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6332
6333(define_expand "vec_perm_const<mode>"
6334  [(match_operand:VEC_PERM_CONST 0 "register_operand")
6335   (match_operand:VEC_PERM_CONST 1 "register_operand")
6336   (match_operand:VEC_PERM_CONST 2 "register_operand")
6337   (match_operand:<sseintvecmode> 3)]
6338  ""
6339{
6340  if (ix86_expand_vec_perm_const (operands))
6341    DONE;
6342  else
6343    FAIL;
6344})
6345
6346;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6347;;
6348;; Parallel bitwise logical operations
6349;;
6350;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6351
6352(define_expand "one_cmpl<mode>2"
6353  [(set (match_operand:VI 0 "register_operand")
6354	(xor:VI (match_operand:VI 1 "nonimmediate_operand")
6355		(match_dup 2)))]
6356  "TARGET_SSE"
6357{
6358  int i, n = GET_MODE_NUNITS (<MODE>mode);
6359  rtvec v = rtvec_alloc (n);
6360
6361  for (i = 0; i < n; ++i)
6362    RTVEC_ELT (v, i) = constm1_rtx;
6363
6364  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6365})
6366
6367(define_expand "<sse2_avx2>_andnot<mode>3"
6368  [(set (match_operand:VI_AVX2 0 "register_operand")
6369	(and:VI_AVX2
6370	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
6371	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
6372  "TARGET_SSE2")
6373
6374(define_insn "*andnot<mode>3"
6375  [(set (match_operand:VI 0 "register_operand" "=x,x")
6376	(and:VI
6377	  (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6378	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6379  "TARGET_SSE"
6380{
6381  static char buf[32];
6382  const char *ops;
6383  const char *tmp;
6384
6385  switch (get_attr_mode (insn))
6386    {
6387    case MODE_OI:
6388      gcc_assert (TARGET_AVX2);
6389    case MODE_TI:
6390      gcc_assert (TARGET_SSE2);
6391
6392      tmp = "pandn";
6393      break;
6394
6395   case MODE_V8SF:
6396      gcc_assert (TARGET_AVX);
6397   case MODE_V4SF:
6398      gcc_assert (TARGET_SSE);
6399
6400      tmp = "andnps";
6401      break;
6402
6403   default:
6404      gcc_unreachable ();
6405   }
6406
6407  switch (which_alternative)
6408    {
6409    case 0:
6410      ops = "%s\t{%%2, %%0|%%0, %%2}";
6411      break;
6412    case 1:
6413      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6414      break;
6415    default:
6416      gcc_unreachable ();
6417    }
6418
6419  snprintf (buf, sizeof (buf), ops, tmp);
6420  return buf;
6421}
6422  [(set_attr "isa" "noavx,avx")
6423   (set_attr "type" "sselog")
6424   (set (attr "prefix_data16")
6425     (if_then_else
6426       (and (eq_attr "alternative" "0")
6427	    (eq_attr "mode" "TI"))
6428       (const_string "1")
6429       (const_string "*")))
6430   (set_attr "prefix" "orig,vex")
6431   (set (attr "mode")
6432	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6433		 (const_string "<ssePSmode>")
6434	       (match_test "TARGET_AVX2")
6435		 (const_string "<sseinsnmode>")
6436	       (match_test "TARGET_AVX")
6437		 (if_then_else
6438		   (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6439		   (const_string "V8SF")
6440		   (const_string "<sseinsnmode>"))
6441	       (ior (not (match_test "TARGET_SSE2"))
6442		    (match_test "optimize_function_for_size_p (cfun)"))
6443		 (const_string "V4SF")
6444	      ]
6445	      (const_string "<sseinsnmode>")))])
6446
6447(define_expand "<code><mode>3"
6448  [(set (match_operand:VI 0 "register_operand")
6449	(any_logic:VI
6450	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
6451	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
6452  "TARGET_SSE"
6453{
6454  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
6455  DONE;
6456})
6457
6458(define_insn "*<code><mode>3"
6459  [(set (match_operand:VI 0 "register_operand" "=x,x")
6460	(any_logic:VI
6461	  (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6462	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6463  "TARGET_SSE
6464   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6465{
6466  static char buf[32];
6467  const char *ops;
6468  const char *tmp;
6469
6470  switch (get_attr_mode (insn))
6471    {
6472    case MODE_OI:
6473      gcc_assert (TARGET_AVX2);
6474    case MODE_TI:
6475      gcc_assert (TARGET_SSE2);
6476
6477      tmp = "p<logic>";
6478      break;
6479
6480   case MODE_V8SF:
6481      gcc_assert (TARGET_AVX);
6482   case MODE_V4SF:
6483      gcc_assert (TARGET_SSE);
6484
6485      tmp = "<logic>ps";
6486      break;
6487
6488   default:
6489      gcc_unreachable ();
6490   }
6491
6492  switch (which_alternative)
6493    {
6494    case 0:
6495      ops = "%s\t{%%2, %%0|%%0, %%2}";
6496      break;
6497    case 1:
6498      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6499      break;
6500    default:
6501      gcc_unreachable ();
6502    }
6503
6504  snprintf (buf, sizeof (buf), ops, tmp);
6505  return buf;
6506}
6507  [(set_attr "isa" "noavx,avx")
6508   (set_attr "type" "sselog")
6509   (set (attr "prefix_data16")
6510     (if_then_else
6511       (and (eq_attr "alternative" "0")
6512	    (eq_attr "mode" "TI"))
6513       (const_string "1")
6514       (const_string "*")))
6515   (set_attr "prefix" "orig,vex")
6516   (set (attr "mode")
6517	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6518		 (const_string "<ssePSmode>")
6519	       (match_test "TARGET_AVX2")
6520		 (const_string "<sseinsnmode>")
6521	       (match_test "TARGET_AVX")
6522		 (if_then_else
6523		   (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6524		   (const_string "V8SF")
6525		   (const_string "<sseinsnmode>"))
6526	       (ior (not (match_test "TARGET_SSE2"))
6527		    (match_test "optimize_function_for_size_p (cfun)"))
6528		 (const_string "V4SF")
6529	      ]
6530	      (const_string "<sseinsnmode>")))])
6531
6532;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6533;;
6534;; Parallel integral element swizzling
6535;;
6536;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6537
6538(define_expand "vec_pack_trunc_<mode>"
6539  [(match_operand:<ssepackmode> 0 "register_operand")
6540   (match_operand:VI248_AVX2 1 "register_operand")
6541   (match_operand:VI248_AVX2 2 "register_operand")]
6542  "TARGET_SSE2"
6543{
6544  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6545  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6546  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6547  DONE;
6548})
6549
6550(define_insn "<sse2_avx2>_packsswb"
6551  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6552	(vec_concat:VI1_AVX2
6553	  (ss_truncate:<ssehalfvecmode>
6554	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6555	  (ss_truncate:<ssehalfvecmode>
6556	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6557  "TARGET_SSE2"
6558  "@
6559   packsswb\t{%2, %0|%0, %2}
6560   vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6561  [(set_attr "isa" "noavx,avx")
6562   (set_attr "type" "sselog")
6563   (set_attr "prefix_data16" "1,*")
6564   (set_attr "prefix" "orig,vex")
6565   (set_attr "mode" "<sseinsnmode>")])
6566
6567(define_insn "<sse2_avx2>_packssdw"
6568  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6569	(vec_concat:VI2_AVX2
6570	  (ss_truncate:<ssehalfvecmode>
6571	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6572	  (ss_truncate:<ssehalfvecmode>
6573	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6574  "TARGET_SSE2"
6575  "@
6576   packssdw\t{%2, %0|%0, %2}
6577   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6578  [(set_attr "isa" "noavx,avx")
6579   (set_attr "type" "sselog")
6580   (set_attr "prefix_data16" "1,*")
6581   (set_attr "prefix" "orig,vex")
6582   (set_attr "mode" "<sseinsnmode>")])
6583
6584(define_insn "<sse2_avx2>_packuswb"
6585  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6586	(vec_concat:VI1_AVX2
6587	  (us_truncate:<ssehalfvecmode>
6588	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6589	  (us_truncate:<ssehalfvecmode>
6590	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6591  "TARGET_SSE2"
6592  "@
6593   packuswb\t{%2, %0|%0, %2}
6594   vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6595  [(set_attr "isa" "noavx,avx")
6596   (set_attr "type" "sselog")
6597   (set_attr "prefix_data16" "1,*")
6598   (set_attr "prefix" "orig,vex")
6599   (set_attr "mode" "<sseinsnmode>")])
6600
6601(define_insn "avx2_interleave_highv32qi"
6602  [(set (match_operand:V32QI 0 "register_operand" "=x")
6603	(vec_select:V32QI
6604	  (vec_concat:V64QI
6605	    (match_operand:V32QI 1 "register_operand" "x")
6606	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6607	  (parallel [(const_int 8)  (const_int 40)
6608		     (const_int 9)  (const_int 41)
6609		     (const_int 10) (const_int 42)
6610		     (const_int 11) (const_int 43)
6611		     (const_int 12) (const_int 44)
6612		     (const_int 13) (const_int 45)
6613		     (const_int 14) (const_int 46)
6614		     (const_int 15) (const_int 47)
6615		     (const_int 24) (const_int 56)
6616		     (const_int 25) (const_int 57)
6617		     (const_int 26) (const_int 58)
6618		     (const_int 27) (const_int 59)
6619		     (const_int 28) (const_int 60)
6620		     (const_int 29) (const_int 61)
6621		     (const_int 30) (const_int 62)
6622		     (const_int 31) (const_int 63)])))]
6623  "TARGET_AVX2"
6624  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6625  [(set_attr "type" "sselog")
6626   (set_attr "prefix" "vex")
6627   (set_attr "mode" "OI")])
6628
6629(define_insn "vec_interleave_highv16qi"
6630  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6631	(vec_select:V16QI
6632	  (vec_concat:V32QI
6633	    (match_operand:V16QI 1 "register_operand" "0,x")
6634	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6635	  (parallel [(const_int 8)  (const_int 24)
6636		     (const_int 9)  (const_int 25)
6637		     (const_int 10) (const_int 26)
6638		     (const_int 11) (const_int 27)
6639		     (const_int 12) (const_int 28)
6640		     (const_int 13) (const_int 29)
6641		     (const_int 14) (const_int 30)
6642		     (const_int 15) (const_int 31)])))]
6643  "TARGET_SSE2"
6644  "@
6645   punpckhbw\t{%2, %0|%0, %2}
6646   vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6647  [(set_attr "isa" "noavx,avx")
6648   (set_attr "type" "sselog")
6649   (set_attr "prefix_data16" "1,*")
6650   (set_attr "prefix" "orig,vex")
6651   (set_attr "mode" "TI")])
6652
6653(define_insn "avx2_interleave_lowv32qi"
6654  [(set (match_operand:V32QI 0 "register_operand" "=x")
6655	(vec_select:V32QI
6656	  (vec_concat:V64QI
6657	    (match_operand:V32QI 1 "register_operand" "x")
6658	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6659	  (parallel [(const_int 0) (const_int 32)
6660		     (const_int 1) (const_int 33)
6661		     (const_int 2) (const_int 34)
6662		     (const_int 3) (const_int 35)
6663		     (const_int 4) (const_int 36)
6664		     (const_int 5) (const_int 37)
6665		     (const_int 6) (const_int 38)
6666		     (const_int 7) (const_int 39)
6667		     (const_int 16) (const_int 48)
6668		     (const_int 17) (const_int 49)
6669		     (const_int 18) (const_int 50)
6670		     (const_int 19) (const_int 51)
6671		     (const_int 20) (const_int 52)
6672		     (const_int 21) (const_int 53)
6673		     (const_int 22) (const_int 54)
6674		     (const_int 23) (const_int 55)])))]
6675  "TARGET_AVX2"
6676  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6677  [(set_attr "type" "sselog")
6678   (set_attr "prefix" "vex")
6679   (set_attr "mode" "OI")])
6680
6681(define_insn "vec_interleave_lowv16qi"
6682  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6683	(vec_select:V16QI
6684	  (vec_concat:V32QI
6685	    (match_operand:V16QI 1 "register_operand" "0,x")
6686	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6687	  (parallel [(const_int 0) (const_int 16)
6688		     (const_int 1) (const_int 17)
6689		     (const_int 2) (const_int 18)
6690		     (const_int 3) (const_int 19)
6691		     (const_int 4) (const_int 20)
6692		     (const_int 5) (const_int 21)
6693		     (const_int 6) (const_int 22)
6694		     (const_int 7) (const_int 23)])))]
6695  "TARGET_SSE2"
6696  "@
6697   punpcklbw\t{%2, %0|%0, %2}
6698   vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6699  [(set_attr "isa" "noavx,avx")
6700   (set_attr "type" "sselog")
6701   (set_attr "prefix_data16" "1,*")
6702   (set_attr "prefix" "orig,vex")
6703   (set_attr "mode" "TI")])
6704
6705(define_insn "avx2_interleave_highv16hi"
6706  [(set (match_operand:V16HI 0 "register_operand" "=x")
6707	(vec_select:V16HI
6708	  (vec_concat:V32HI
6709	    (match_operand:V16HI 1 "register_operand" "x")
6710	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6711	  (parallel [(const_int 4) (const_int 20)
6712		     (const_int 5) (const_int 21)
6713		     (const_int 6) (const_int 22)
6714		     (const_int 7) (const_int 23)
6715		     (const_int 12) (const_int 28)
6716		     (const_int 13) (const_int 29)
6717		     (const_int 14) (const_int 30)
6718		     (const_int 15) (const_int 31)])))]
6719  "TARGET_AVX2"
6720  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6721  [(set_attr "type" "sselog")
6722   (set_attr "prefix" "vex")
6723   (set_attr "mode" "OI")])
6724
6725(define_insn "vec_interleave_highv8hi"
6726  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6727	(vec_select:V8HI
6728	  (vec_concat:V16HI
6729	    (match_operand:V8HI 1 "register_operand" "0,x")
6730	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6731	  (parallel [(const_int 4) (const_int 12)
6732		     (const_int 5) (const_int 13)
6733		     (const_int 6) (const_int 14)
6734		     (const_int 7) (const_int 15)])))]
6735  "TARGET_SSE2"
6736  "@
6737   punpckhwd\t{%2, %0|%0, %2}
6738   vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6739  [(set_attr "isa" "noavx,avx")
6740   (set_attr "type" "sselog")
6741   (set_attr "prefix_data16" "1,*")
6742   (set_attr "prefix" "orig,vex")
6743   (set_attr "mode" "TI")])
6744
6745(define_insn "avx2_interleave_lowv16hi"
6746  [(set (match_operand:V16HI 0 "register_operand" "=x")
6747	(vec_select:V16HI
6748	  (vec_concat:V32HI
6749	    (match_operand:V16HI 1 "register_operand" "x")
6750	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6751	  (parallel [(const_int 0) (const_int 16)
6752		     (const_int 1) (const_int 17)
6753		     (const_int 2) (const_int 18)
6754		     (const_int 3) (const_int 19)
6755		     (const_int 8) (const_int 24)
6756		     (const_int 9) (const_int 25)
6757		     (const_int 10) (const_int 26)
6758		     (const_int 11) (const_int 27)])))]
6759  "TARGET_AVX2"
6760  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6761  [(set_attr "type" "sselog")
6762   (set_attr "prefix" "vex")
6763   (set_attr "mode" "OI")])
6764
6765(define_insn "vec_interleave_lowv8hi"
6766  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6767	(vec_select:V8HI
6768	  (vec_concat:V16HI
6769	    (match_operand:V8HI 1 "register_operand" "0,x")
6770	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6771	  (parallel [(const_int 0) (const_int 8)
6772		     (const_int 1) (const_int 9)
6773		     (const_int 2) (const_int 10)
6774		     (const_int 3) (const_int 11)])))]
6775  "TARGET_SSE2"
6776  "@
6777   punpcklwd\t{%2, %0|%0, %2}
6778   vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6779  [(set_attr "isa" "noavx,avx")
6780   (set_attr "type" "sselog")
6781   (set_attr "prefix_data16" "1,*")
6782   (set_attr "prefix" "orig,vex")
6783   (set_attr "mode" "TI")])
6784
6785(define_insn "avx2_interleave_highv8si"
6786  [(set (match_operand:V8SI 0 "register_operand" "=x")
6787	(vec_select:V8SI
6788	  (vec_concat:V16SI
6789	    (match_operand:V8SI 1 "register_operand" "x")
6790	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6791	  (parallel [(const_int 2) (const_int 10)
6792		     (const_int 3) (const_int 11)
6793		     (const_int 6) (const_int 14)
6794		     (const_int 7) (const_int 15)])))]
6795  "TARGET_AVX2"
6796  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6797  [(set_attr "type" "sselog")
6798   (set_attr "prefix" "vex")
6799   (set_attr "mode" "OI")])
6800
6801(define_insn "vec_interleave_highv4si"
6802  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6803	(vec_select:V4SI
6804	  (vec_concat:V8SI
6805	    (match_operand:V4SI 1 "register_operand" "0,x")
6806	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6807	  (parallel [(const_int 2) (const_int 6)
6808		     (const_int 3) (const_int 7)])))]
6809  "TARGET_SSE2"
6810  "@
6811   punpckhdq\t{%2, %0|%0, %2}
6812   vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6813  [(set_attr "isa" "noavx,avx")
6814   (set_attr "type" "sselog")
6815   (set_attr "prefix_data16" "1,*")
6816   (set_attr "prefix" "orig,vex")
6817   (set_attr "mode" "TI")])
6818
6819(define_insn "avx2_interleave_lowv8si"
6820  [(set (match_operand:V8SI 0 "register_operand" "=x")
6821	(vec_select:V8SI
6822	  (vec_concat:V16SI
6823	    (match_operand:V8SI 1 "register_operand" "x")
6824	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6825	  (parallel [(const_int 0) (const_int 8)
6826		     (const_int 1) (const_int 9)
6827		     (const_int 4) (const_int 12)
6828		     (const_int 5) (const_int 13)])))]
6829  "TARGET_AVX2"
6830  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6831  [(set_attr "type" "sselog")
6832   (set_attr "prefix" "vex")
6833   (set_attr "mode" "OI")])
6834
6835(define_insn "vec_interleave_lowv4si"
6836  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6837	(vec_select:V4SI
6838	  (vec_concat:V8SI
6839	    (match_operand:V4SI 1 "register_operand" "0,x")
6840	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6841	  (parallel [(const_int 0) (const_int 4)
6842		     (const_int 1) (const_int 5)])))]
6843  "TARGET_SSE2"
6844  "@
6845   punpckldq\t{%2, %0|%0, %2}
6846   vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6847  [(set_attr "isa" "noavx,avx")
6848   (set_attr "type" "sselog")
6849   (set_attr "prefix_data16" "1,*")
6850   (set_attr "prefix" "orig,vex")
6851   (set_attr "mode" "TI")])
6852
6853(define_expand "vec_interleave_high<mode>"
6854  [(match_operand:VI_256 0 "register_operand" "=x")
6855   (match_operand:VI_256 1 "register_operand" "x")
6856   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6857 "TARGET_AVX2"
6858{
6859  rtx t1 = gen_reg_rtx (<MODE>mode);
6860  rtx t2 = gen_reg_rtx (<MODE>mode);
6861  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6862  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
6863  emit_insn (gen_avx2_permv2ti
6864	     (gen_lowpart (V4DImode, operands[0]),
6865	      gen_lowpart (V4DImode, t1),
6866	      gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6867  DONE;
6868})
6869
6870(define_expand "vec_interleave_low<mode>"
6871  [(match_operand:VI_256 0 "register_operand" "=x")
6872   (match_operand:VI_256 1 "register_operand" "x")
6873   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6874 "TARGET_AVX2"
6875{
6876  rtx t1 = gen_reg_rtx (<MODE>mode);
6877  rtx t2 = gen_reg_rtx (<MODE>mode);
6878  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6879  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6880  emit_insn (gen_avx2_permv2ti
6881	     (gen_lowpart (V4DImode, operands[0]),
6882	      gen_lowpart (V4DImode, t1),
6883	      gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6884  DONE;
6885})
6886
6887;; Modes handled by pinsr patterns.
6888(define_mode_iterator PINSR_MODE
6889  [(V16QI "TARGET_SSE4_1") V8HI
6890   (V4SI "TARGET_SSE4_1")
6891   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6892
6893(define_mode_attr sse2p4_1
6894  [(V16QI "sse4_1") (V8HI "sse2")
6895   (V4SI "sse4_1") (V2DI "sse4_1")])
6896
6897;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6898(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6899  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6900	(vec_merge:PINSR_MODE
6901	  (vec_duplicate:PINSR_MODE
6902	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6903	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6904	  (match_operand:SI 3 "const_int_operand")))]
6905  "TARGET_SSE2
6906   && ((unsigned) exact_log2 (INTVAL (operands[3]))
6907       < GET_MODE_NUNITS (<MODE>mode))"
6908{
6909  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6910
6911  switch (which_alternative)
6912    {
6913    case 0:
6914      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6915	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6916      /* FALLTHRU */
6917    case 1:
6918      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6919    case 2:
6920      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6921	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6922      /* FALLTHRU */
6923    case 3:
6924      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6925    default:
6926      gcc_unreachable ();
6927    }
6928}
6929  [(set_attr "isa" "noavx,noavx,avx,avx")
6930   (set_attr "type" "sselog")
6931   (set (attr "prefix_rex")
6932     (if_then_else
6933       (and (not (match_test "TARGET_AVX"))
6934	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6935       (const_string "1")
6936       (const_string "*")))
6937   (set (attr "prefix_data16")
6938     (if_then_else
6939       (and (not (match_test "TARGET_AVX"))
6940	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6941       (const_string "1")
6942       (const_string "*")))
6943   (set (attr "prefix_extra")
6944     (if_then_else
6945       (and (not (match_test "TARGET_AVX"))
6946	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6947       (const_string "*")
6948       (const_string "1")))
6949   (set_attr "length_immediate" "1")
6950   (set_attr "prefix" "orig,orig,vex,vex")
6951   (set_attr "mode" "TI")])
6952
6953(define_insn "*sse4_1_pextrb_<mode>"
6954  [(set (match_operand:SWI48 0 "register_operand" "=r")
6955	(zero_extend:SWI48
6956	  (vec_select:QI
6957	    (match_operand:V16QI 1 "register_operand" "x")
6958	    (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6959  "TARGET_SSE4_1"
6960  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6961  [(set_attr "type" "sselog")
6962   (set_attr "prefix_extra" "1")
6963   (set_attr "length_immediate" "1")
6964   (set_attr "prefix" "maybe_vex")
6965   (set_attr "mode" "TI")])
6966
6967(define_insn "*sse4_1_pextrb_memory"
6968  [(set (match_operand:QI 0 "memory_operand" "=m")
6969	(vec_select:QI
6970	  (match_operand:V16QI 1 "register_operand" "x")
6971	  (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6972  "TARGET_SSE4_1"
6973  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6974  [(set_attr "type" "sselog")
6975   (set_attr "prefix_extra" "1")
6976   (set_attr "length_immediate" "1")
6977   (set_attr "prefix" "maybe_vex")
6978   (set_attr "mode" "TI")])
6979
6980(define_insn "*sse2_pextrw_<mode>"
6981  [(set (match_operand:SWI48 0 "register_operand" "=r")
6982	(zero_extend:SWI48
6983	  (vec_select:HI
6984	    (match_operand:V8HI 1 "register_operand" "x")
6985	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6986  "TARGET_SSE2"
6987  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6988  [(set_attr "type" "sselog")
6989   (set_attr "prefix_data16" "1")
6990   (set_attr "length_immediate" "1")
6991   (set_attr "prefix" "maybe_vex")
6992   (set_attr "mode" "TI")])
6993
6994(define_insn "*sse4_1_pextrw_memory"
6995  [(set (match_operand:HI 0 "memory_operand" "=m")
6996	(vec_select:HI
6997	  (match_operand:V8HI 1 "register_operand" "x")
6998	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6999  "TARGET_SSE4_1"
7000  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7001  [(set_attr "type" "sselog")
7002   (set_attr "prefix_extra" "1")
7003   (set_attr "length_immediate" "1")
7004   (set_attr "prefix" "maybe_vex")
7005   (set_attr "mode" "TI")])
7006
7007(define_insn "*sse4_1_pextrd"
7008  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7009	(vec_select:SI
7010	  (match_operand:V4SI 1 "register_operand" "x")
7011	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7012  "TARGET_SSE4_1"
7013  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7014  [(set_attr "type" "sselog")
7015   (set_attr "prefix_extra" "1")
7016   (set_attr "length_immediate" "1")
7017   (set_attr "prefix" "maybe_vex")
7018   (set_attr "mode" "TI")])
7019
7020(define_insn "*sse4_1_pextrd_zext"
7021  [(set (match_operand:DI 0 "register_operand" "=r")
7022	(zero_extend:DI
7023	  (vec_select:SI
7024	    (match_operand:V4SI 1 "register_operand" "x")
7025	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7026  "TARGET_64BIT && TARGET_SSE4_1"
7027  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7028  [(set_attr "type" "sselog")
7029   (set_attr "prefix_extra" "1")
7030   (set_attr "length_immediate" "1")
7031   (set_attr "prefix" "maybe_vex")
7032   (set_attr "mode" "TI")])
7033
7034;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7035(define_insn "*sse4_1_pextrq"
7036  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7037	(vec_select:DI
7038	  (match_operand:V2DI 1 "register_operand" "x")
7039	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7040  "TARGET_SSE4_1 && TARGET_64BIT"
7041  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7042  [(set_attr "type" "sselog")
7043   (set_attr "prefix_rex" "1")
7044   (set_attr "prefix_extra" "1")
7045   (set_attr "length_immediate" "1")
7046   (set_attr "prefix" "maybe_vex")
7047   (set_attr "mode" "TI")])
7048
7049(define_expand "avx2_pshufdv3"
7050  [(match_operand:V8SI 0 "register_operand")
7051   (match_operand:V8SI 1 "nonimmediate_operand")
7052   (match_operand:SI 2 "const_0_to_255_operand")]
7053  "TARGET_AVX2"
7054{
7055  int mask = INTVAL (operands[2]);
7056  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7057				GEN_INT ((mask >> 0) & 3),
7058				GEN_INT ((mask >> 2) & 3),
7059				GEN_INT ((mask >> 4) & 3),
7060				GEN_INT ((mask >> 6) & 3),
7061				GEN_INT (((mask >> 0) & 3) + 4),
7062				GEN_INT (((mask >> 2) & 3) + 4),
7063				GEN_INT (((mask >> 4) & 3) + 4),
7064				GEN_INT (((mask >> 6) & 3) + 4)));
7065  DONE;
7066})
7067
7068(define_insn "avx2_pshufd_1"
7069  [(set (match_operand:V8SI 0 "register_operand" "=x")
7070	(vec_select:V8SI
7071	  (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7072	  (parallel [(match_operand 2 "const_0_to_3_operand")
7073		     (match_operand 3 "const_0_to_3_operand")
7074		     (match_operand 4 "const_0_to_3_operand")
7075		     (match_operand 5 "const_0_to_3_operand")
7076		     (match_operand 6 "const_4_to_7_operand")
7077		     (match_operand 7 "const_4_to_7_operand")
7078		     (match_operand 8 "const_4_to_7_operand")
7079		     (match_operand 9 "const_4_to_7_operand")])))]
7080  "TARGET_AVX2
7081   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7082   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7083   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7084   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7085{
7086  int mask = 0;
7087  mask |= INTVAL (operands[2]) << 0;
7088  mask |= INTVAL (operands[3]) << 2;
7089  mask |= INTVAL (operands[4]) << 4;
7090  mask |= INTVAL (operands[5]) << 6;
7091  operands[2] = GEN_INT (mask);
7092
7093  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7094}
7095  [(set_attr "type" "sselog1")
7096   (set_attr "prefix" "vex")
7097   (set_attr "length_immediate" "1")
7098   (set_attr "mode" "OI")])
7099
7100(define_expand "sse2_pshufd"
7101  [(match_operand:V4SI 0 "register_operand")
7102   (match_operand:V4SI 1 "nonimmediate_operand")
7103   (match_operand:SI 2 "const_int_operand")]
7104  "TARGET_SSE2"
7105{
7106  int mask = INTVAL (operands[2]);
7107  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7108				GEN_INT ((mask >> 0) & 3),
7109				GEN_INT ((mask >> 2) & 3),
7110				GEN_INT ((mask >> 4) & 3),
7111				GEN_INT ((mask >> 6) & 3)));
7112  DONE;
7113})
7114
7115(define_insn "sse2_pshufd_1"
7116  [(set (match_operand:V4SI 0 "register_operand" "=x")
7117	(vec_select:V4SI
7118	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7119	  (parallel [(match_operand 2 "const_0_to_3_operand")
7120		     (match_operand 3 "const_0_to_3_operand")
7121		     (match_operand 4 "const_0_to_3_operand")
7122		     (match_operand 5 "const_0_to_3_operand")])))]
7123  "TARGET_SSE2"
7124{
7125  int mask = 0;
7126  mask |= INTVAL (operands[2]) << 0;
7127  mask |= INTVAL (operands[3]) << 2;
7128  mask |= INTVAL (operands[4]) << 4;
7129  mask |= INTVAL (operands[5]) << 6;
7130  operands[2] = GEN_INT (mask);
7131
7132  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7133}
7134  [(set_attr "type" "sselog1")
7135   (set_attr "prefix_data16" "1")
7136   (set_attr "prefix" "maybe_vex")
7137   (set_attr "length_immediate" "1")
7138   (set_attr "mode" "TI")])
7139
7140(define_expand "avx2_pshuflwv3"
7141  [(match_operand:V16HI 0 "register_operand")
7142   (match_operand:V16HI 1 "nonimmediate_operand")
7143   (match_operand:SI 2 "const_0_to_255_operand")]
7144  "TARGET_AVX2"
7145{
7146  int mask = INTVAL (operands[2]);
7147  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7148				 GEN_INT ((mask >> 0) & 3),
7149				 GEN_INT ((mask >> 2) & 3),
7150				 GEN_INT ((mask >> 4) & 3),
7151				 GEN_INT ((mask >> 6) & 3),
7152				 GEN_INT (((mask >> 0) & 3) + 8),
7153				 GEN_INT (((mask >> 2) & 3) + 8),
7154				 GEN_INT (((mask >> 4) & 3) + 8),
7155				 GEN_INT (((mask >> 6) & 3) + 8)));
7156  DONE;
7157})
7158
7159(define_insn "avx2_pshuflw_1"
7160  [(set (match_operand:V16HI 0 "register_operand" "=x")
7161	(vec_select:V16HI
7162	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7163	  (parallel [(match_operand 2 "const_0_to_3_operand")
7164		     (match_operand 3 "const_0_to_3_operand")
7165		     (match_operand 4 "const_0_to_3_operand")
7166		     (match_operand 5 "const_0_to_3_operand")
7167		     (const_int 4)
7168		     (const_int 5)
7169		     (const_int 6)
7170		     (const_int 7)
7171		     (match_operand 6 "const_8_to_11_operand")
7172		     (match_operand 7 "const_8_to_11_operand")
7173		     (match_operand 8 "const_8_to_11_operand")
7174		     (match_operand 9 "const_8_to_11_operand")
7175		     (const_int 12)
7176		     (const_int 13)
7177		     (const_int 14)
7178		     (const_int 15)])))]
7179  "TARGET_AVX2
7180   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7181   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7182   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7183   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7184{
7185  int mask = 0;
7186  mask |= INTVAL (operands[2]) << 0;
7187  mask |= INTVAL (operands[3]) << 2;
7188  mask |= INTVAL (operands[4]) << 4;
7189  mask |= INTVAL (operands[5]) << 6;
7190  operands[2] = GEN_INT (mask);
7191
7192  return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7193}
7194  [(set_attr "type" "sselog")
7195   (set_attr "prefix" "vex")
7196   (set_attr "length_immediate" "1")
7197   (set_attr "mode" "OI")])
7198
7199(define_expand "sse2_pshuflw"
7200  [(match_operand:V8HI 0 "register_operand")
7201   (match_operand:V8HI 1 "nonimmediate_operand")
7202   (match_operand:SI 2 "const_int_operand")]
7203  "TARGET_SSE2"
7204{
7205  int mask = INTVAL (operands[2]);
7206  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7207				 GEN_INT ((mask >> 0) & 3),
7208				 GEN_INT ((mask >> 2) & 3),
7209				 GEN_INT ((mask >> 4) & 3),
7210				 GEN_INT ((mask >> 6) & 3)));
7211  DONE;
7212})
7213
7214(define_insn "sse2_pshuflw_1"
7215  [(set (match_operand:V8HI 0 "register_operand" "=x")
7216	(vec_select:V8HI
7217	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7218	  (parallel [(match_operand 2 "const_0_to_3_operand")
7219		     (match_operand 3 "const_0_to_3_operand")
7220		     (match_operand 4 "const_0_to_3_operand")
7221		     (match_operand 5 "const_0_to_3_operand")
7222		     (const_int 4)
7223		     (const_int 5)
7224		     (const_int 6)
7225		     (const_int 7)])))]
7226  "TARGET_SSE2"
7227{
7228  int mask = 0;
7229  mask |= INTVAL (operands[2]) << 0;
7230  mask |= INTVAL (operands[3]) << 2;
7231  mask |= INTVAL (operands[4]) << 4;
7232  mask |= INTVAL (operands[5]) << 6;
7233  operands[2] = GEN_INT (mask);
7234
7235  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7236}
7237  [(set_attr "type" "sselog")
7238   (set_attr "prefix_data16" "0")
7239   (set_attr "prefix_rep" "1")
7240   (set_attr "prefix" "maybe_vex")
7241   (set_attr "length_immediate" "1")
7242   (set_attr "mode" "TI")])
7243
7244(define_expand "avx2_pshufhwv3"
7245  [(match_operand:V16HI 0 "register_operand")
7246   (match_operand:V16HI 1 "nonimmediate_operand")
7247   (match_operand:SI 2 "const_0_to_255_operand")]
7248  "TARGET_AVX2"
7249{
7250  int mask = INTVAL (operands[2]);
7251  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7252				 GEN_INT (((mask >> 0) & 3) + 4),
7253				 GEN_INT (((mask >> 2) & 3) + 4),
7254				 GEN_INT (((mask >> 4) & 3) + 4),
7255				 GEN_INT (((mask >> 6) & 3) + 4),
7256				 GEN_INT (((mask >> 0) & 3) + 12),
7257				 GEN_INT (((mask >> 2) & 3) + 12),
7258				 GEN_INT (((mask >> 4) & 3) + 12),
7259				 GEN_INT (((mask >> 6) & 3) + 12)));
7260  DONE;
7261})
7262
7263(define_insn "avx2_pshufhw_1"
7264  [(set (match_operand:V16HI 0 "register_operand" "=x")
7265	(vec_select:V16HI
7266	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7267	  (parallel [(const_int 0)
7268		     (const_int 1)
7269		     (const_int 2)
7270		     (const_int 3)
7271		     (match_operand 2 "const_4_to_7_operand")
7272		     (match_operand 3 "const_4_to_7_operand")
7273		     (match_operand 4 "const_4_to_7_operand")
7274		     (match_operand 5 "const_4_to_7_operand")
7275		     (const_int 8)
7276		     (const_int 9)
7277		     (const_int 10)
7278		     (const_int 11)
7279		     (match_operand 6 "const_12_to_15_operand")
7280		     (match_operand 7 "const_12_to_15_operand")
7281		     (match_operand 8 "const_12_to_15_operand")
7282		     (match_operand 9 "const_12_to_15_operand")])))]
7283  "TARGET_AVX2
7284   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7285   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7286   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7287   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7288{
7289  int mask = 0;
7290  mask |= (INTVAL (operands[2]) - 4) << 0;
7291  mask |= (INTVAL (operands[3]) - 4) << 2;
7292  mask |= (INTVAL (operands[4]) - 4) << 4;
7293  mask |= (INTVAL (operands[5]) - 4) << 6;
7294  operands[2] = GEN_INT (mask);
7295
7296  return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7297}
7298  [(set_attr "type" "sselog")
7299   (set_attr "prefix" "vex")
7300   (set_attr "length_immediate" "1")
7301   (set_attr "mode" "OI")])
7302
7303(define_expand "sse2_pshufhw"
7304  [(match_operand:V8HI 0 "register_operand")
7305   (match_operand:V8HI 1 "nonimmediate_operand")
7306   (match_operand:SI 2 "const_int_operand")]
7307  "TARGET_SSE2"
7308{
7309  int mask = INTVAL (operands[2]);
7310  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7311				 GEN_INT (((mask >> 0) & 3) + 4),
7312				 GEN_INT (((mask >> 2) & 3) + 4),
7313				 GEN_INT (((mask >> 4) & 3) + 4),
7314				 GEN_INT (((mask >> 6) & 3) + 4)));
7315  DONE;
7316})
7317
7318(define_insn "sse2_pshufhw_1"
7319  [(set (match_operand:V8HI 0 "register_operand" "=x")
7320	(vec_select:V8HI
7321	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7322	  (parallel [(const_int 0)
7323		     (const_int 1)
7324		     (const_int 2)
7325		     (const_int 3)
7326		     (match_operand 2 "const_4_to_7_operand")
7327		     (match_operand 3 "const_4_to_7_operand")
7328		     (match_operand 4 "const_4_to_7_operand")
7329		     (match_operand 5 "const_4_to_7_operand")])))]
7330  "TARGET_SSE2"
7331{
7332  int mask = 0;
7333  mask |= (INTVAL (operands[2]) - 4) << 0;
7334  mask |= (INTVAL (operands[3]) - 4) << 2;
7335  mask |= (INTVAL (operands[4]) - 4) << 4;
7336  mask |= (INTVAL (operands[5]) - 4) << 6;
7337  operands[2] = GEN_INT (mask);
7338
7339  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7340}
7341  [(set_attr "type" "sselog")
7342   (set_attr "prefix_rep" "1")
7343   (set_attr "prefix_data16" "0")
7344   (set_attr "prefix" "maybe_vex")
7345   (set_attr "length_immediate" "1")
7346   (set_attr "mode" "TI")])
7347
7348(define_expand "sse2_loadd"
7349  [(set (match_operand:V4SI 0 "register_operand")
7350	(vec_merge:V4SI
7351	  (vec_duplicate:V4SI
7352	    (match_operand:SI 1 "nonimmediate_operand"))
7353	  (match_dup 2)
7354	  (const_int 1)))]
7355  "TARGET_SSE"
7356  "operands[2] = CONST0_RTX (V4SImode);")
7357
7358(define_insn "sse2_loadld"
7359  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
7360	(vec_merge:V4SI
7361	  (vec_duplicate:V4SI
7362	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7363	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
7364	  (const_int 1)))]
7365  "TARGET_SSE"
7366  "@
7367   %vmovd\t{%2, %0|%0, %2}
7368   %vmovd\t{%2, %0|%0, %2}
7369   movss\t{%2, %0|%0, %2}
7370   movss\t{%2, %0|%0, %2}
7371   vmovss\t{%2, %1, %0|%0, %1, %2}"
7372  [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
7373   (set_attr "type" "ssemov")
7374   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7375   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7376
7377(define_insn_and_split "sse2_stored"
7378  [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7379	(vec_select:SI
7380	  (match_operand:V4SI 1 "register_operand" "x,Yi")
7381	  (parallel [(const_int 0)])))]
7382  "TARGET_SSE"
7383  "#"
7384  "&& reload_completed
7385   && (TARGET_INTER_UNIT_MOVES
7386       || MEM_P (operands [0])
7387       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7388  [(set (match_dup 0) (match_dup 1))]
7389  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7390
7391(define_insn_and_split "*vec_ext_v4si_mem"
7392  [(set (match_operand:SI 0 "register_operand" "=r")
7393	(vec_select:SI
7394	  (match_operand:V4SI 1 "memory_operand" "o")
7395	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
7396  ""
7397  "#"
7398  "reload_completed"
7399  [(const_int 0)]
7400{
7401  int i = INTVAL (operands[2]);
7402
7403  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7404  DONE;
7405})
7406
7407(define_expand "sse_storeq"
7408  [(set (match_operand:DI 0 "nonimmediate_operand")
7409	(vec_select:DI
7410	  (match_operand:V2DI 1 "register_operand")
7411	  (parallel [(const_int 0)])))]
7412  "TARGET_SSE")
7413
7414(define_insn "*sse2_storeq_rex64"
7415  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7416	(vec_select:DI
7417	  (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7418	  (parallel [(const_int 0)])))]
7419  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7420  "@
7421   #
7422   #
7423   mov{q}\t{%1, %0|%0, %1}"
7424  [(set_attr "type" "*,*,imov")
7425   (set_attr "mode" "*,*,DI")])
7426
7427(define_insn "*sse2_storeq"
7428  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7429	(vec_select:DI
7430	  (match_operand:V2DI 1 "register_operand" "x")
7431	  (parallel [(const_int 0)])))]
7432  "TARGET_SSE"
7433  "#")
7434
7435(define_split
7436  [(set (match_operand:DI 0 "nonimmediate_operand")
7437	(vec_select:DI
7438	  (match_operand:V2DI 1 "register_operand")
7439	  (parallel [(const_int 0)])))]
7440  "TARGET_SSE
7441   && reload_completed
7442   && (TARGET_INTER_UNIT_MOVES
7443       || MEM_P (operands [0])
7444       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7445  [(set (match_dup 0) (match_dup 1))]
7446  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7447
7448(define_insn "*vec_extractv2di_1_rex64"
7449  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,r")
7450	(vec_select:DI
7451	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7452	  (parallel [(const_int 1)])))]
7453  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7454  "@
7455   %vmovhps\t{%1, %0|%0, %1}
7456   psrldq\t{$8, %0|%0, 8}
7457   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7458   %vmovq\t{%H1, %0|%0, %H1}
7459   mov{q}\t{%H1, %0|%0, %H1}"
7460  [(set_attr "isa" "*,noavx,avx,*,*")
7461   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7462   (set_attr "length_immediate" "*,1,1,*,*")
7463   (set_attr "memory" "*,none,none,*,*")
7464   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7465   (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7466
7467(define_insn "*vec_extractv2di_1"
7468  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,x,x")
7469	(vec_select:DI
7470	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7471	  (parallel [(const_int 1)])))]
7472  "!TARGET_64BIT && TARGET_SSE
7473   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7474  "@
7475   %vmovhps\t{%1, %0|%0, %1}
7476   psrldq\t{$8, %0|%0, 8}
7477   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7478   %vmovq\t{%H1, %0|%0, %H1}
7479   movhlps\t{%1, %0|%0, %1}
7480   movlps\t{%H1, %0|%0, %H1}"
7481  [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7482   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7483   (set_attr "length_immediate" "*,1,1,*,*,*")
7484   (set_attr "memory" "*,none,none,*,*,*")
7485   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7486   (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7487
7488(define_insn "*vec_dupv4si"
7489  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
7490	(vec_duplicate:V4SI
7491	  (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7492  "TARGET_SSE"
7493  "@
7494   %vpshufd\t{$0, %1, %0|%0, %1, 0}
7495   vbroadcastss\t{%1, %0|%0, %1}
7496   shufps\t{$0, %0, %0|%0, %0, 0}"
7497  [(set_attr "isa" "sse2,avx,noavx")
7498   (set_attr "type" "sselog1,ssemov,sselog1")
7499   (set_attr "length_immediate" "1,0,1")
7500   (set_attr "prefix_extra" "0,1,*")
7501   (set_attr "prefix" "maybe_vex,vex,orig")
7502   (set_attr "mode" "TI,V4SF,V4SF")])
7503
7504(define_insn "*vec_dupv2di"
7505  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
7506	(vec_duplicate:V2DI
7507	  (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7508  "TARGET_SSE"
7509  "@
7510   punpcklqdq\t%0, %0
7511   vpunpcklqdq\t{%d1, %0|%0, %d1}
7512   %vmovddup\t{%1, %0|%0, %1}
7513   movlhps\t%0, %0"
7514  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7515   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7516   (set_attr "prefix" "orig,vex,maybe_vex,orig")
7517   (set_attr "mode" "TI,TI,DF,V4SF")])
7518
7519(define_insn "*vec_concatv2si_sse4_1"
7520  [(set (match_operand:V2SI 0 "register_operand"     "=x, x,x,x, x, *y,*y")
7521	(vec_concat:V2SI
7522	  (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm,  0,rm")
7523	  (match_operand:SI 2 "vector_move_operand"  "rm,rm,x,x, C,*ym, C")))]
7524  "TARGET_SSE4_1"
7525  "@
7526   pinsrd\t{$1, %2, %0|%0, %2, 1}
7527   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7528   punpckldq\t{%2, %0|%0, %2}
7529   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7530   %vmovd\t{%1, %0|%0, %1}
7531   punpckldq\t{%2, %0|%0, %2}
7532   movd\t{%1, %0|%0, %1}"
7533  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7534   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7535   (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7536   (set_attr "length_immediate" "1,1,*,*,*,*,*")
7537   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7538   (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7539
7540;; ??? In theory we can match memory for the MMX alternative, but allowing
7541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7542;; alternatives pretty much forces the MMX alternative to be chosen.
7543(define_insn "*vec_concatv2si_sse2"
7544  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
7545	(vec_concat:V2SI
7546	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7547	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
7548  "TARGET_SSE2"
7549  "@
7550   punpckldq\t{%2, %0|%0, %2}
7551   movd\t{%1, %0|%0, %1}
7552   punpckldq\t{%2, %0|%0, %2}
7553   movd\t{%1, %0|%0, %1}"
7554  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7555   (set_attr "mode" "TI,TI,DI,DI")])
7556
7557(define_insn "*vec_concatv2si_sse"
7558  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
7559	(vec_concat:V2SI
7560	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7561	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
7562  "TARGET_SSE"
7563  "@
7564   unpcklps\t{%2, %0|%0, %2}
7565   movss\t{%1, %0|%0, %1}
7566   punpckldq\t{%2, %0|%0, %2}
7567   movd\t{%1, %0|%0, %1}"
7568  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7569   (set_attr "mode" "V4SF,V4SF,DI,DI")])
7570
7571(define_insn "*vec_concatv4si"
7572  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
7573	(vec_concat:V4SI
7574	  (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
7575	  (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7576  "TARGET_SSE"
7577  "@
7578   punpcklqdq\t{%2, %0|%0, %2}
7579   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7580   movlhps\t{%2, %0|%0, %2}
7581   movhps\t{%2, %0|%0, %2}
7582   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7583  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7584   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7585   (set_attr "prefix" "orig,vex,orig,orig,vex")
7586   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7587
7588;; movd instead of movq is required to handle broken assemblers.
7589(define_insn "*vec_concatv2di_rex64"
7590  [(set (match_operand:V2DI 0 "register_operand"
7591	  "=x,x ,x ,Yi,!x,x,x,x,x")
7592	(vec_concat:V2DI
7593	  (match_operand:DI 1 "nonimmediate_operand"
7594	  " 0,x ,xm,r ,*y,0,x,0,x")
7595	  (match_operand:DI 2 "vector_move_operand"
7596	  "rm,rm,C ,C ,C ,x,x,m,m")))]
7597  "TARGET_64BIT"
7598  "@
7599   pinsrq\t{$1, %2, %0|%0, %2, 1}
7600   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7601   %vmovq\t{%1, %0|%0, %1}
7602   %vmovd\t{%1, %0|%0, %1}
7603   movq2dq\t{%1, %0|%0, %1}
7604   punpcklqdq\t{%2, %0|%0, %2}
7605   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7606   movhps\t{%2, %0|%0, %2}
7607   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7608  [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7609   (set (attr "type")
7610     (if_then_else
7611       (eq_attr "alternative" "0,1,5,6")
7612       (const_string "sselog")
7613       (const_string "ssemov")))
7614   (set (attr "prefix_rex")
7615     (if_then_else
7616       (and (eq_attr "alternative" "0,3")
7617	    (not (match_test "TARGET_AVX")))
7618       (const_string "1")
7619       (const_string "*")))
7620   (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7621   (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7622   (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7623   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7624
7625(define_insn "vec_concatv2di"
7626  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x,x,x,x")
7627	(vec_concat:V2DI
7628	  (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7629	  (match_operand:DI 2 "vector_move_operand"  " C, C,x,x,x,m,m")))]
7630  "!TARGET_64BIT && TARGET_SSE"
7631  "@
7632   %vmovq\t{%1, %0|%0, %1}
7633   movq2dq\t{%1, %0|%0, %1}
7634   punpcklqdq\t{%2, %0|%0, %2}
7635   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7636   movlhps\t{%2, %0|%0, %2}
7637   movhps\t{%2, %0|%0, %2}
7638   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7639  [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7640   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7641   (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7642   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7643
7644(define_expand "vec_unpacks_lo_<mode>"
7645  [(match_operand:<sseunpackmode> 0 "register_operand")
7646   (match_operand:VI124_AVX2 1 "register_operand")]
7647  "TARGET_SSE2"
7648  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
7649
7650(define_expand "vec_unpacks_hi_<mode>"
7651  [(match_operand:<sseunpackmode> 0 "register_operand")
7652   (match_operand:VI124_AVX2 1 "register_operand")]
7653  "TARGET_SSE2"
7654  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
7655
7656(define_expand "vec_unpacku_lo_<mode>"
7657  [(match_operand:<sseunpackmode> 0 "register_operand")
7658   (match_operand:VI124_AVX2 1 "register_operand")]
7659  "TARGET_SSE2"
7660  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
7661
7662(define_expand "vec_unpacku_hi_<mode>"
7663  [(match_operand:<sseunpackmode> 0 "register_operand")
7664   (match_operand:VI124_AVX2 1 "register_operand")]
7665  "TARGET_SSE2"
7666  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
7667
7668;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7669;;
7670;; Miscellaneous
7671;;
7672;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7673
7674(define_expand "<sse2_avx2>_uavg<mode>3"
7675  [(set (match_operand:VI12_AVX2 0 "register_operand")
7676	(truncate:VI12_AVX2
7677	  (lshiftrt:<ssedoublemode>
7678	    (plus:<ssedoublemode>
7679	      (plus:<ssedoublemode>
7680		(zero_extend:<ssedoublemode>
7681		  (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
7682		(zero_extend:<ssedoublemode>
7683		  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
7684	      (match_dup 3))
7685	    (const_int 1))))]
7686  "TARGET_SSE2"
7687{
7688  operands[3] = CONST1_RTX(<MODE>mode);
7689  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7690})
7691
7692(define_insn "*<sse2_avx2>_uavg<mode>3"
7693  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
7694	(truncate:VI12_AVX2
7695	  (lshiftrt:<ssedoublemode>
7696	    (plus:<ssedoublemode>
7697	      (plus:<ssedoublemode>
7698		(zero_extend:<ssedoublemode>
7699		  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
7700		(zero_extend:<ssedoublemode>
7701		  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
7702	      (match_operand:VI12_AVX2 3 "const1_operand"))
7703	    (const_int 1))))]
7704  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7705  "@
7706   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
7707   vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7708  [(set_attr "isa" "noavx,avx")
7709   (set_attr "type" "sseiadd")
7710   (set_attr "prefix_data16" "1,*")
7711   (set_attr "prefix" "orig,vex")
7712   (set_attr "mode" "<sseinsnmode>")])
7713
7714;; The correct representation for this is absolutely enormous, and
7715;; surely not generally useful.
7716(define_insn "<sse2_avx2>_psadbw"
7717  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7718	(unspec:VI8_AVX2
7719	  [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7720	   (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7721	  UNSPEC_PSADBW))]
7722  "TARGET_SSE2"
7723  "@
7724   psadbw\t{%2, %0|%0, %2}
7725   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7726  [(set_attr "isa" "noavx,avx")
7727   (set_attr "type" "sseiadd")
7728   (set_attr "atom_unit" "simul")
7729   (set_attr "prefix_data16" "1,*")
7730   (set_attr "prefix" "orig,vex")
7731   (set_attr "mode" "<sseinsnmode>")])
7732
7733(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7734  [(set (match_operand:SI 0 "register_operand" "=r")
7735	(unspec:SI
7736	  [(match_operand:VF 1 "register_operand" "x")]
7737	  UNSPEC_MOVMSK))]
7738  "TARGET_SSE"
7739  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7740  [(set_attr "type" "ssemov")
7741   (set_attr "prefix" "maybe_vex")
7742   (set_attr "mode" "<MODE>")])
7743
7744(define_insn "avx2_pmovmskb"
7745  [(set (match_operand:SI 0 "register_operand" "=r")
7746	(unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7747		   UNSPEC_MOVMSK))]
7748  "TARGET_AVX2"
7749  "vpmovmskb\t{%1, %0|%0, %1}"
7750  [(set_attr "type" "ssemov")
7751   (set_attr "prefix" "vex")
7752   (set_attr "mode" "DI")])
7753
7754(define_insn "sse2_pmovmskb"
7755  [(set (match_operand:SI 0 "register_operand" "=r")
7756	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7757		   UNSPEC_MOVMSK))]
7758  "TARGET_SSE2"
7759  "%vpmovmskb\t{%1, %0|%0, %1}"
7760  [(set_attr "type" "ssemov")
7761   (set_attr "prefix_data16" "1")
7762   (set_attr "prefix" "maybe_vex")
7763   (set_attr "mode" "SI")])
7764
7765(define_expand "sse2_maskmovdqu"
7766  [(set (match_operand:V16QI 0 "memory_operand")
7767	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
7768		       (match_operand:V16QI 2 "register_operand")
7769		       (match_dup 0)]
7770		      UNSPEC_MASKMOV))]
7771  "TARGET_SSE2")
7772
7773(define_insn "*sse2_maskmovdqu"
7774  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7775	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7776		       (match_operand:V16QI 2 "register_operand" "x")
7777		       (mem:V16QI (match_dup 0))]
7778		      UNSPEC_MASKMOV))]
7779  "TARGET_SSE2"
7780{
7781  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
7782     that requires %v to be at the beginning of the opcode name.  */
7783  if (Pmode != word_mode)
7784    fputs ("\taddr32", asm_out_file);
7785  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
7786}
7787  [(set_attr "type" "ssemov")
7788   (set_attr "prefix_data16" "1")
7789   (set (attr "length_address")
7790     (symbol_ref ("Pmode != word_mode")))
7791   ;; The implicit %rdi operand confuses default length_vex computation.
7792   (set (attr "length_vex")
7793     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7794   (set_attr "prefix" "maybe_vex")
7795   (set_attr "mode" "TI")])
7796
7797(define_insn "sse_ldmxcsr"
7798  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7799		    UNSPECV_LDMXCSR)]
7800  "TARGET_SSE"
7801  "%vldmxcsr\t%0"
7802  [(set_attr "type" "sse")
7803   (set_attr "atom_sse_attr" "mxcsr")
7804   (set_attr "prefix" "maybe_vex")
7805   (set_attr "memory" "load")])
7806
7807(define_insn "sse_stmxcsr"
7808  [(set (match_operand:SI 0 "memory_operand" "=m")
7809	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7810  "TARGET_SSE"
7811  "%vstmxcsr\t%0"
7812  [(set_attr "type" "sse")
7813   (set_attr "atom_sse_attr" "mxcsr")
7814   (set_attr "prefix" "maybe_vex")
7815   (set_attr "memory" "store")])
7816
7817(define_insn "sse2_clflush"
7818  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7819		    UNSPECV_CLFLUSH)]
7820  "TARGET_SSE2"
7821  "clflush\t%a0"
7822  [(set_attr "type" "sse")
7823   (set_attr "atom_sse_attr" "fence")
7824   (set_attr "memory" "unknown")])
7825
7826;; As per AMD and Intel ISA manuals, the first operand is extensions
7827;; and it goes to %ecx. The second operand received is hints and it goes
7828;; to %eax.
7829(define_insn "sse3_mwait"
7830  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
7831		     (match_operand:SI 1 "register_operand" "a")]
7832		    UNSPECV_MWAIT)]
7833  "TARGET_SSE3"
7834;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7835;; Since 32bit register operands are implicitly zero extended to 64bit,
7836;; we only need to set up 32bit registers.
7837  "mwait"
7838  [(set_attr "length" "3")])
7839
7840(define_insn "sse3_monitor_<mode>"
7841  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
7842		     (match_operand:SI 1 "register_operand" "c")
7843		     (match_operand:SI 2 "register_operand" "d")]
7844		    UNSPECV_MONITOR)]
7845  "TARGET_SSE3"
7846;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7847;; RCX and RDX are used.  Since 32bit register operands are implicitly
7848;; zero extended to 64bit, we only need to set up 32bit registers.
7849  "%^monitor"
7850  [(set (attr "length")
7851     (symbol_ref ("(Pmode != word_mode) + 3")))])
7852
7853;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7854;;
7855;; SSSE3 instructions
7856;;
7857;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7858
7859(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
7860
7861(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
7862  [(set (match_operand:V16HI 0 "register_operand" "=x")
7863	(vec_concat:V16HI
7864	  (vec_concat:V8HI
7865	    (vec_concat:V4HI
7866	      (vec_concat:V2HI
7867		(ssse3_plusminus:HI
7868		  (vec_select:HI
7869		    (match_operand:V16HI 1 "register_operand" "x")
7870		    (parallel [(const_int 0)]))
7871		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7872		(ssse3_plusminus:HI
7873		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7874		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7875	      (vec_concat:V2HI
7876		(ssse3_plusminus:HI
7877		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7878		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7879		(ssse3_plusminus:HI
7880		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7881		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7882	    (vec_concat:V4HI
7883	      (vec_concat:V2HI
7884		(ssse3_plusminus:HI
7885		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
7886		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
7887		(ssse3_plusminus:HI
7888		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
7889		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
7890	      (vec_concat:V2HI
7891		(ssse3_plusminus:HI
7892		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
7893		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
7894		(ssse3_plusminus:HI
7895		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
7896		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
7897	  (vec_concat:V8HI
7898	    (vec_concat:V4HI
7899	      (vec_concat:V2HI
7900		(ssse3_plusminus:HI
7901		  (vec_select:HI
7902		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7903		    (parallel [(const_int 0)]))
7904		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7905		(ssse3_plusminus:HI
7906		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7907		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7908	      (vec_concat:V2HI
7909		(ssse3_plusminus:HI
7910		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7911		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7912		(ssse3_plusminus:HI
7913		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7914		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
7915	    (vec_concat:V4HI
7916	      (vec_concat:V2HI
7917		(ssse3_plusminus:HI
7918		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
7919		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
7920		(ssse3_plusminus:HI
7921		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
7922		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
7923	      (vec_concat:V2HI
7924		(ssse3_plusminus:HI
7925		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
7926		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
7927		(ssse3_plusminus:HI
7928		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
7929		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
7930  "TARGET_AVX2"
7931  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7932  [(set_attr "type" "sseiadd")
7933   (set_attr "prefix_extra" "1")
7934   (set_attr "prefix" "vex")
7935   (set_attr "mode" "OI")])
7936
7937(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
7938  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7939	(vec_concat:V8HI
7940	  (vec_concat:V4HI
7941	    (vec_concat:V2HI
7942	      (ssse3_plusminus:HI
7943		(vec_select:HI
7944		  (match_operand:V8HI 1 "register_operand" "0,x")
7945		  (parallel [(const_int 0)]))
7946		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7947	      (ssse3_plusminus:HI
7948		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7949		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7950	    (vec_concat:V2HI
7951	      (ssse3_plusminus:HI
7952		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7953		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7954	      (ssse3_plusminus:HI
7955		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7956		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7957	  (vec_concat:V4HI
7958	    (vec_concat:V2HI
7959	      (ssse3_plusminus:HI
7960		(vec_select:HI
7961		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7962		  (parallel [(const_int 0)]))
7963		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7964	      (ssse3_plusminus:HI
7965		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7966		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7967	    (vec_concat:V2HI
7968	      (ssse3_plusminus:HI
7969		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7970		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7971	      (ssse3_plusminus:HI
7972		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7973		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7974  "TARGET_SSSE3"
7975  "@
7976   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
7977   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7978  [(set_attr "isa" "noavx,avx")
7979   (set_attr "type" "sseiadd")
7980   (set_attr "atom_unit" "complex")
7981   (set_attr "prefix_data16" "1,*")
7982   (set_attr "prefix_extra" "1")
7983   (set_attr "prefix" "orig,vex")
7984   (set_attr "mode" "TI")])
7985
7986(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
7987  [(set (match_operand:V4HI 0 "register_operand" "=y")
7988	(vec_concat:V4HI
7989	  (vec_concat:V2HI
7990	    (ssse3_plusminus:HI
7991	      (vec_select:HI
7992		(match_operand:V4HI 1 "register_operand" "0")
7993		(parallel [(const_int 0)]))
7994	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7995	    (ssse3_plusminus:HI
7996	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7997	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7998	  (vec_concat:V2HI
7999	    (ssse3_plusminus:HI
8000	      (vec_select:HI
8001		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8002		(parallel [(const_int 0)]))
8003	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8004	    (ssse3_plusminus:HI
8005	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8006	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8007  "TARGET_SSSE3"
8008  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
8009  [(set_attr "type" "sseiadd")
8010   (set_attr "atom_unit" "complex")
8011   (set_attr "prefix_extra" "1")
8012   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8013   (set_attr "mode" "DI")])
8014
8015(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
8016  [(set (match_operand:V8SI 0 "register_operand" "=x")
8017	(vec_concat:V8SI
8018	  (vec_concat:V4SI
8019	    (vec_concat:V2SI
8020	      (plusminus:SI
8021		(vec_select:SI
8022		  (match_operand:V8SI 1 "register_operand" "x")
8023		  (parallel [(const_int 0)]))
8024		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8025	      (plusminus:SI
8026		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8027		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8028	    (vec_concat:V2SI
8029	      (plusminus:SI
8030		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8031		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8032	      (plusminus:SI
8033		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8034		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8035	  (vec_concat:V4SI
8036	    (vec_concat:V2SI
8037	      (plusminus:SI
8038		(vec_select:SI
8039		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8040		  (parallel [(const_int 0)]))
8041		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8042	      (plusminus:SI
8043		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8044		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8045	    (vec_concat:V2SI
8046	      (plusminus:SI
8047		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8048		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8049	      (plusminus:SI
8050		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8051		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8052  "TARGET_AVX2"
8053  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8054  [(set_attr "type" "sseiadd")
8055   (set_attr "prefix_extra" "1")
8056   (set_attr "prefix" "vex")
8057   (set_attr "mode" "OI")])
8058
8059(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
8060  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8061	(vec_concat:V4SI
8062	  (vec_concat:V2SI
8063	    (plusminus:SI
8064	      (vec_select:SI
8065		(match_operand:V4SI 1 "register_operand" "0,x")
8066		(parallel [(const_int 0)]))
8067	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8068	    (plusminus:SI
8069	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8070	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8071	  (vec_concat:V2SI
8072	    (plusminus:SI
8073	      (vec_select:SI
8074		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8075		(parallel [(const_int 0)]))
8076	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8077	    (plusminus:SI
8078	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8079	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8080  "TARGET_SSSE3"
8081  "@
8082   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
8083   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8084  [(set_attr "isa" "noavx,avx")
8085   (set_attr "type" "sseiadd")
8086   (set_attr "atom_unit" "complex")
8087   (set_attr "prefix_data16" "1,*")
8088   (set_attr "prefix_extra" "1")
8089   (set_attr "prefix" "orig,vex")
8090   (set_attr "mode" "TI")])
8091
8092(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
8093  [(set (match_operand:V2SI 0 "register_operand" "=y")
8094	(vec_concat:V2SI
8095	  (plusminus:SI
8096	    (vec_select:SI
8097	      (match_operand:V2SI 1 "register_operand" "0")
8098	      (parallel [(const_int 0)]))
8099	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8100	  (plusminus:SI
8101	    (vec_select:SI
8102	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8103	      (parallel [(const_int 0)]))
8104	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8105  "TARGET_SSSE3"
8106  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
8107  [(set_attr "type" "sseiadd")
8108   (set_attr "atom_unit" "complex")
8109   (set_attr "prefix_extra" "1")
8110   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8111   (set_attr "mode" "DI")])
8112
8113(define_insn "avx2_pmaddubsw256"
8114  [(set (match_operand:V16HI 0 "register_operand" "=x")
8115	(ss_plus:V16HI
8116	  (mult:V16HI
8117	    (zero_extend:V16HI
8118	      (vec_select:V16QI
8119		(match_operand:V32QI 1 "register_operand" "x")
8120		(parallel [(const_int 0) (const_int 2)
8121			   (const_int 4) (const_int 6)
8122			   (const_int 8) (const_int 10)
8123			   (const_int 12) (const_int 14)
8124			   (const_int 16) (const_int 18)
8125			   (const_int 20) (const_int 22)
8126			   (const_int 24) (const_int 26)
8127			   (const_int 28) (const_int 30)])))
8128	    (sign_extend:V16HI
8129	      (vec_select:V16QI
8130		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
8131		(parallel [(const_int 0) (const_int 2)
8132			   (const_int 4) (const_int 6)
8133			   (const_int 8) (const_int 10)
8134			   (const_int 12) (const_int 14)
8135			   (const_int 16) (const_int 18)
8136			   (const_int 20) (const_int 22)
8137			   (const_int 24) (const_int 26)
8138			   (const_int 28) (const_int 30)]))))
8139	  (mult:V16HI
8140	    (zero_extend:V16HI
8141	      (vec_select:V16QI (match_dup 1)
8142		(parallel [(const_int 1) (const_int 3)
8143			   (const_int 5) (const_int 7)
8144			   (const_int 9) (const_int 11)
8145			   (const_int 13) (const_int 15)
8146			   (const_int 17) (const_int 19)
8147			   (const_int 21) (const_int 23)
8148			   (const_int 25) (const_int 27)
8149			   (const_int 29) (const_int 31)])))
8150	    (sign_extend:V16HI
8151	      (vec_select:V16QI (match_dup 2)
8152		(parallel [(const_int 1) (const_int 3)
8153			   (const_int 5) (const_int 7)
8154			   (const_int 9) (const_int 11)
8155			   (const_int 13) (const_int 15)
8156			   (const_int 17) (const_int 19)
8157			   (const_int 21) (const_int 23)
8158			   (const_int 25) (const_int 27)
8159			   (const_int 29) (const_int 31)]))))))]
8160  "TARGET_AVX2"
8161  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8162  [(set_attr "type" "sseiadd")
8163   (set_attr "prefix_extra" "1")
8164   (set_attr "prefix" "vex")
8165   (set_attr "mode" "OI")])
8166
8167(define_insn "ssse3_pmaddubsw128"
8168  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8169	(ss_plus:V8HI
8170	  (mult:V8HI
8171	    (zero_extend:V8HI
8172	      (vec_select:V8QI
8173		(match_operand:V16QI 1 "register_operand" "0,x")
8174		(parallel [(const_int 0) (const_int 2)
8175			   (const_int 4) (const_int 6)
8176			   (const_int 8) (const_int 10)
8177			   (const_int 12) (const_int 14)])))
8178	    (sign_extend:V8HI
8179	      (vec_select:V8QI
8180		(match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8181		(parallel [(const_int 0) (const_int 2)
8182			   (const_int 4) (const_int 6)
8183			   (const_int 8) (const_int 10)
8184			   (const_int 12) (const_int 14)]))))
8185	  (mult:V8HI
8186	    (zero_extend:V8HI
8187	      (vec_select:V8QI (match_dup 1)
8188		(parallel [(const_int 1) (const_int 3)
8189			   (const_int 5) (const_int 7)
8190			   (const_int 9) (const_int 11)
8191			   (const_int 13) (const_int 15)])))
8192	    (sign_extend:V8HI
8193	      (vec_select:V8QI (match_dup 2)
8194		(parallel [(const_int 1) (const_int 3)
8195			   (const_int 5) (const_int 7)
8196			   (const_int 9) (const_int 11)
8197			   (const_int 13) (const_int 15)]))))))]
8198  "TARGET_SSSE3"
8199  "@
8200   pmaddubsw\t{%2, %0|%0, %2}
8201   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8202  [(set_attr "isa" "noavx,avx")
8203   (set_attr "type" "sseiadd")
8204   (set_attr "atom_unit" "simul")
8205   (set_attr "prefix_data16" "1,*")
8206   (set_attr "prefix_extra" "1")
8207   (set_attr "prefix" "orig,vex")
8208   (set_attr "mode" "TI")])
8209
8210(define_insn "ssse3_pmaddubsw"
8211  [(set (match_operand:V4HI 0 "register_operand" "=y")
8212	(ss_plus:V4HI
8213	  (mult:V4HI
8214	    (zero_extend:V4HI
8215	      (vec_select:V4QI
8216		(match_operand:V8QI 1 "register_operand" "0")
8217		(parallel [(const_int 0) (const_int 2)
8218			   (const_int 4) (const_int 6)])))
8219	    (sign_extend:V4HI
8220	      (vec_select:V4QI
8221		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
8222		(parallel [(const_int 0) (const_int 2)
8223			   (const_int 4) (const_int 6)]))))
8224	  (mult:V4HI
8225	    (zero_extend:V4HI
8226	      (vec_select:V4QI (match_dup 1)
8227		(parallel [(const_int 1) (const_int 3)
8228			   (const_int 5) (const_int 7)])))
8229	    (sign_extend:V4HI
8230	      (vec_select:V4QI (match_dup 2)
8231		(parallel [(const_int 1) (const_int 3)
8232			   (const_int 5) (const_int 7)]))))))]
8233  "TARGET_SSSE3"
8234  "pmaddubsw\t{%2, %0|%0, %2}"
8235  [(set_attr "type" "sseiadd")
8236   (set_attr "atom_unit" "simul")
8237   (set_attr "prefix_extra" "1")
8238   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8239   (set_attr "mode" "DI")])
8240
8241(define_mode_iterator PMULHRSW
8242  [V4HI V8HI (V16HI "TARGET_AVX2")])
8243
8244(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
8245  [(set (match_operand:PMULHRSW 0 "register_operand")
8246	(truncate:PMULHRSW
8247	  (lshiftrt:<ssedoublemode>
8248	    (plus:<ssedoublemode>
8249	      (lshiftrt:<ssedoublemode>
8250		(mult:<ssedoublemode>
8251		  (sign_extend:<ssedoublemode>
8252		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
8253		  (sign_extend:<ssedoublemode>
8254		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
8255		(const_int 14))
8256	      (match_dup 3))
8257	    (const_int 1))))]
8258  "TARGET_AVX2"
8259{
8260  operands[3] = CONST1_RTX(<MODE>mode);
8261  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8262})
8263
8264(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
8265  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8266	(truncate:VI2_AVX2
8267	  (lshiftrt:<ssedoublemode>
8268	    (plus:<ssedoublemode>
8269	      (lshiftrt:<ssedoublemode>
8270		(mult:<ssedoublemode>
8271		  (sign_extend:<ssedoublemode>
8272		    (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8273		  (sign_extend:<ssedoublemode>
8274		    (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8275		(const_int 14))
8276	      (match_operand:VI2_AVX2 3 "const1_operand"))
8277	    (const_int 1))))]
8278  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8279  "@
8280   pmulhrsw\t{%2, %0|%0, %2}
8281   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8282  [(set_attr "isa" "noavx,avx")
8283   (set_attr "type" "sseimul")
8284   (set_attr "prefix_data16" "1,*")
8285   (set_attr "prefix_extra" "1")
8286   (set_attr "prefix" "orig,vex")
8287   (set_attr "mode" "<sseinsnmode>")])
8288
8289(define_insn "*ssse3_pmulhrswv4hi3"
8290  [(set (match_operand:V4HI 0 "register_operand" "=y")
8291	(truncate:V4HI
8292	  (lshiftrt:V4SI
8293	    (plus:V4SI
8294	      (lshiftrt:V4SI
8295		(mult:V4SI
8296		  (sign_extend:V4SI
8297		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8298		  (sign_extend:V4SI
8299		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8300		(const_int 14))
8301	      (match_operand:V4HI 3 "const1_operand"))
8302	    (const_int 1))))]
8303  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8304  "pmulhrsw\t{%2, %0|%0, %2}"
8305  [(set_attr "type" "sseimul")
8306   (set_attr "prefix_extra" "1")
8307   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8308   (set_attr "mode" "DI")])
8309
8310(define_insn "<ssse3_avx2>_pshufb<mode>3"
8311  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8312	(unspec:VI1_AVX2
8313	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8314	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
8315	  UNSPEC_PSHUFB))]
8316  "TARGET_SSSE3"
8317  "@
8318   pshufb\t{%2, %0|%0, %2}
8319   vpshufb\t{%2, %1, %0|%0, %1, %2}"
8320  [(set_attr "isa" "noavx,avx")
8321   (set_attr "type" "sselog1")
8322   (set_attr "prefix_data16" "1,*")
8323   (set_attr "prefix_extra" "1")
8324   (set_attr "prefix" "orig,vex")
8325   (set_attr "btver2_decode" "vector,vector")
8326   (set_attr "mode" "<sseinsnmode>")])
8327
8328(define_insn "ssse3_pshufbv8qi3"
8329  [(set (match_operand:V8QI 0 "register_operand" "=y")
8330	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8331		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8332		     UNSPEC_PSHUFB))]
8333  "TARGET_SSSE3"
8334  "pshufb\t{%2, %0|%0, %2}";
8335  [(set_attr "type" "sselog1")
8336   (set_attr "prefix_extra" "1")
8337   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8338   (set_attr "mode" "DI")])
8339
8340(define_insn "<ssse3_avx2>_psign<mode>3"
8341  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
8342	(unspec:VI124_AVX2
8343	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
8344	   (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
8345	  UNSPEC_PSIGN))]
8346  "TARGET_SSSE3"
8347  "@
8348   psign<ssemodesuffix>\t{%2, %0|%0, %2}
8349   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8350  [(set_attr "isa" "noavx,avx")
8351   (set_attr "type" "sselog1")
8352   (set_attr "prefix_data16" "1,*")
8353   (set_attr "prefix_extra" "1")
8354   (set_attr "prefix" "orig,vex")
8355   (set_attr "mode" "<sseinsnmode>")])
8356
8357(define_insn "ssse3_psign<mode>3"
8358  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8359	(unspec:MMXMODEI
8360	  [(match_operand:MMXMODEI 1 "register_operand" "0")
8361	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8362	  UNSPEC_PSIGN))]
8363  "TARGET_SSSE3"
8364  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8365  [(set_attr "type" "sselog1")
8366   (set_attr "prefix_extra" "1")
8367   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8368   (set_attr "mode" "DI")])
8369
8370(define_insn "<ssse3_avx2>_palignr<mode>"
8371  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
8372	(unspec:SSESCALARMODE
8373	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
8374	   (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
8375	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
8376	  UNSPEC_PALIGNR))]
8377  "TARGET_SSSE3"
8378{
8379  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8380
8381  switch (which_alternative)
8382    {
8383    case 0:
8384      return "palignr\t{%3, %2, %0|%0, %2, %3}";
8385    case 1:
8386      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8387    default:
8388      gcc_unreachable ();
8389    }
8390}
8391  [(set_attr "isa" "noavx,avx")
8392   (set_attr "type" "sseishft")
8393   (set_attr "atom_unit" "sishuf")
8394   (set_attr "prefix_data16" "1,*")
8395   (set_attr "prefix_extra" "1")
8396   (set_attr "length_immediate" "1")
8397   (set_attr "prefix" "orig,vex")
8398   (set_attr "mode" "<sseinsnmode>")])
8399
8400(define_insn "ssse3_palignrdi"
8401  [(set (match_operand:DI 0 "register_operand" "=y")
8402	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
8403		    (match_operand:DI 2 "nonimmediate_operand" "ym")
8404		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8405		   UNSPEC_PALIGNR))]
8406  "TARGET_SSSE3"
8407{
8408  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8409  return "palignr\t{%3, %2, %0|%0, %2, %3}";
8410}
8411  [(set_attr "type" "sseishft")
8412   (set_attr "atom_unit" "sishuf")
8413   (set_attr "prefix_extra" "1")
8414   (set_attr "length_immediate" "1")
8415   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8416   (set_attr "mode" "DI")])
8417
8418(define_insn "abs<mode>2"
8419  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
8420	(abs:VI124_AVX2
8421	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
8422  "TARGET_SSSE3"
8423  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
8424  [(set_attr "type" "sselog1")
8425   (set_attr "prefix_data16" "1")
8426   (set_attr "prefix_extra" "1")
8427   (set_attr "prefix" "maybe_vex")
8428   (set_attr "mode" "<sseinsnmode>")])
8429
8430(define_insn "abs<mode>2"
8431  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8432	(abs:MMXMODEI
8433	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8434  "TARGET_SSSE3"
8435  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8436  [(set_attr "type" "sselog1")
8437   (set_attr "prefix_rep" "0")
8438   (set_attr "prefix_extra" "1")
8439   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8440   (set_attr "mode" "DI")])
8441
8442;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8443;;
8444;; AMD SSE4A instructions
8445;;
8446;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8447
8448(define_insn "sse4a_movnt<mode>"
8449  [(set (match_operand:MODEF 0 "memory_operand" "=m")
8450	(unspec:MODEF
8451	  [(match_operand:MODEF 1 "register_operand" "x")]
8452	  UNSPEC_MOVNT))]
8453  "TARGET_SSE4A"
8454  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
8455  [(set_attr "type" "ssemov")
8456   (set_attr "mode" "<MODE>")])
8457
8458(define_insn "sse4a_vmmovnt<mode>"
8459  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8460	(unspec:<ssescalarmode>
8461	  [(vec_select:<ssescalarmode>
8462	     (match_operand:VF_128 1 "register_operand" "x")
8463	     (parallel [(const_int 0)]))]
8464	  UNSPEC_MOVNT))]
8465  "TARGET_SSE4A"
8466  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8467  [(set_attr "type" "ssemov")
8468   (set_attr "mode" "<ssescalarmode>")])
8469
8470(define_insn "sse4a_extrqi"
8471  [(set (match_operand:V2DI 0 "register_operand" "=x")
8472	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8473		      (match_operand 2 "const_0_to_255_operand")
8474		      (match_operand 3 "const_0_to_255_operand")]
8475		     UNSPEC_EXTRQI))]
8476  "TARGET_SSE4A"
8477  "extrq\t{%3, %2, %0|%0, %2, %3}"
8478  [(set_attr "type" "sse")
8479   (set_attr "prefix_data16" "1")
8480   (set_attr "length_immediate" "2")
8481   (set_attr "mode" "TI")])
8482
8483(define_insn "sse4a_extrq"
8484  [(set (match_operand:V2DI 0 "register_operand" "=x")
8485	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8486		      (match_operand:V16QI 2 "register_operand" "x")]
8487		     UNSPEC_EXTRQ))]
8488  "TARGET_SSE4A"
8489  "extrq\t{%2, %0|%0, %2}"
8490  [(set_attr "type" "sse")
8491   (set_attr "prefix_data16" "1")
8492   (set_attr "mode" "TI")])
8493
8494(define_insn "sse4a_insertqi"
8495  [(set (match_operand:V2DI 0 "register_operand" "=x")
8496	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8497		      (match_operand:V2DI 2 "register_operand" "x")
8498		      (match_operand 3 "const_0_to_255_operand")
8499		      (match_operand 4 "const_0_to_255_operand")]
8500		     UNSPEC_INSERTQI))]
8501  "TARGET_SSE4A"
8502  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8503  [(set_attr "type" "sseins")
8504   (set_attr "prefix_data16" "0")
8505   (set_attr "prefix_rep" "1")
8506   (set_attr "length_immediate" "2")
8507   (set_attr "mode" "TI")])
8508
8509(define_insn "sse4a_insertq"
8510  [(set (match_operand:V2DI 0 "register_operand" "=x")
8511	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8512		      (match_operand:V2DI 2 "register_operand" "x")]
8513		     UNSPEC_INSERTQ))]
8514  "TARGET_SSE4A"
8515  "insertq\t{%2, %0|%0, %2}"
8516  [(set_attr "type" "sseins")
8517   (set_attr "prefix_data16" "0")
8518   (set_attr "prefix_rep" "1")
8519   (set_attr "mode" "TI")])
8520
8521;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8522;;
8523;; Intel SSE4.1 instructions
8524;;
8525;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8526
8527(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
8528  [(set (match_operand:VF 0 "register_operand" "=x,x")
8529	(vec_merge:VF
8530	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8531	  (match_operand:VF 1 "register_operand" "0,x")
8532	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
8533  "TARGET_SSE4_1"
8534  "@
8535   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8536   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8537  [(set_attr "isa" "noavx,avx")
8538   (set_attr "type" "ssemov")
8539   (set_attr "length_immediate" "1")
8540   (set_attr "prefix_data16" "1,*")
8541   (set_attr "prefix_extra" "1")
8542   (set_attr "prefix" "orig,vex")
8543   (set_attr "mode" "<MODE>")])
8544
8545(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
8546  [(set (match_operand:VF 0 "register_operand" "=x,x")
8547	(unspec:VF
8548	  [(match_operand:VF 1 "register_operand" "0,x")
8549	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8550	   (match_operand:VF 3 "register_operand" "Yz,x")]
8551	  UNSPEC_BLENDV))]
8552  "TARGET_SSE4_1"
8553  "@
8554   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8555   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8556  [(set_attr "isa" "noavx,avx")
8557   (set_attr "type" "ssemov")
8558   (set_attr "length_immediate" "1")
8559   (set_attr "prefix_data16" "1,*")
8560   (set_attr "prefix_extra" "1")
8561   (set_attr "prefix" "orig,vex")
8562   (set_attr "btver2_decode" "vector,vector")
8563   (set_attr "mode" "<MODE>")])
8564
8565(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
8566  [(set (match_operand:VF 0 "register_operand" "=x,x")
8567	(unspec:VF
8568	  [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
8569	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8570	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8571	  UNSPEC_DP))]
8572  "TARGET_SSE4_1"
8573  "@
8574   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8575   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8576  [(set_attr "isa" "noavx,avx")
8577   (set_attr "type" "ssemul")
8578   (set_attr "length_immediate" "1")
8579   (set_attr "prefix_data16" "1,*")
8580   (set_attr "prefix_extra" "1")
8581   (set_attr "prefix" "orig,vex")
8582   (set_attr "btver2_decode" "vector,vector")
8583   (set_attr "mode" "<MODE>")])
8584
8585(define_insn "<sse4_1_avx2>_movntdqa"
8586  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
8587	(unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
8588		     UNSPEC_MOVNTDQA))]
8589  "TARGET_SSE4_1"
8590  "%vmovntdqa\t{%1, %0|%0, %1}"
8591  [(set_attr "type" "ssemov")
8592   (set_attr "prefix_extra" "1")
8593   (set_attr "prefix" "maybe_vex")
8594   (set_attr "mode" "<sseinsnmode>")])
8595
8596(define_insn "<sse4_1_avx2>_mpsadbw"
8597  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8598	(unspec:VI1_AVX2
8599	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8600	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8601	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8602	  UNSPEC_MPSADBW))]
8603  "TARGET_SSE4_1"
8604  "@
8605   mpsadbw\t{%3, %2, %0|%0, %2, %3}
8606   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8607  [(set_attr "isa" "noavx,avx")
8608   (set_attr "type" "sselog1")
8609   (set_attr "length_immediate" "1")
8610   (set_attr "prefix_extra" "1")
8611   (set_attr "prefix" "orig,vex")
8612   (set_attr "btver2_decode" "vector,vector")
8613   (set_attr "mode" "<sseinsnmode>")])
8614
8615(define_insn "avx2_packusdw"
8616  [(set (match_operand:V16HI 0 "register_operand" "=x")
8617	(vec_concat:V16HI
8618	  (us_truncate:V8HI
8619	    (match_operand:V8SI 1 "register_operand" "x"))
8620	  (us_truncate:V8HI
8621	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
8622  "TARGET_AVX2"
8623  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8624  [(set_attr "type" "sselog")
8625   (set_attr "prefix_extra" "1")
8626   (set_attr "prefix" "vex")
8627   (set_attr "mode" "OI")])
8628
8629(define_insn "sse4_1_packusdw"
8630  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8631	(vec_concat:V8HI
8632	  (us_truncate:V4HI
8633	    (match_operand:V4SI 1 "register_operand" "0,x"))
8634	  (us_truncate:V4HI
8635	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
8636  "TARGET_SSE4_1"
8637  "@
8638   packusdw\t{%2, %0|%0, %2}
8639   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8640  [(set_attr "isa" "noavx,avx")
8641   (set_attr "type" "sselog")
8642   (set_attr "prefix_extra" "1")
8643   (set_attr "prefix" "orig,vex")
8644   (set_attr "mode" "TI")])
8645
8646(define_insn "<sse4_1_avx2>_pblendvb"
8647  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8648	(unspec:VI1_AVX2
8649	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,x")
8650	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8651	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
8652	  UNSPEC_BLENDV))]
8653  "TARGET_SSE4_1"
8654  "@
8655   pblendvb\t{%3, %2, %0|%0, %2, %3}
8656   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8657  [(set_attr "isa" "noavx,avx")
8658   (set_attr "type" "ssemov")
8659   (set_attr "prefix_extra" "1")
8660   (set_attr "length_immediate" "*,1")
8661   (set_attr "prefix" "orig,vex")
8662   (set_attr "btver2_decode" "vector,vector")
8663   (set_attr "mode" "<sseinsnmode>")])
8664
8665(define_insn "sse4_1_pblendw"
8666  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8667	(vec_merge:V8HI
8668	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8669	  (match_operand:V8HI 1 "register_operand" "0,x")
8670	  (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8671  "TARGET_SSE4_1"
8672  "@
8673   pblendw\t{%3, %2, %0|%0, %2, %3}
8674   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8675  [(set_attr "isa" "noavx,avx")
8676   (set_attr "type" "ssemov")
8677   (set_attr "prefix_extra" "1")
8678   (set_attr "length_immediate" "1")
8679   (set_attr "prefix" "orig,vex")
8680   (set_attr "mode" "TI")])
8681
8682;; The builtin uses an 8-bit immediate.  Expand that.
8683(define_expand "avx2_pblendw"
8684  [(set (match_operand:V16HI 0 "register_operand")
8685	(vec_merge:V16HI
8686	  (match_operand:V16HI 2 "nonimmediate_operand")
8687	  (match_operand:V16HI 1 "register_operand")
8688	  (match_operand:SI 3 "const_0_to_255_operand")))]
8689  "TARGET_AVX2"
8690{
8691  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
8692  operands[3] = GEN_INT (val << 8 | val);
8693})
8694
8695(define_insn "*avx2_pblendw"
8696  [(set (match_operand:V16HI 0 "register_operand" "=x")
8697	(vec_merge:V16HI
8698	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8699	  (match_operand:V16HI 1 "register_operand" "x")
8700	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
8701  "TARGET_AVX2"
8702{
8703  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
8704  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8705}
8706  [(set_attr "type" "ssemov")
8707   (set_attr "prefix_extra" "1")
8708   (set_attr "length_immediate" "1")
8709   (set_attr "prefix" "vex")
8710   (set_attr "mode" "OI")])
8711
8712(define_insn "avx2_pblendd<mode>"
8713  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
8714	(vec_merge:VI4_AVX2
8715	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
8716	  (match_operand:VI4_AVX2 1 "register_operand" "x")
8717	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8718  "TARGET_AVX2"
8719  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8720  [(set_attr "type" "ssemov")
8721   (set_attr "prefix_extra" "1")
8722   (set_attr "length_immediate" "1")
8723   (set_attr "prefix" "vex")
8724   (set_attr "mode" "<sseinsnmode>")])
8725
8726(define_insn "sse4_1_phminposuw"
8727  [(set (match_operand:V8HI 0 "register_operand" "=x")
8728	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8729		     UNSPEC_PHMINPOSUW))]
8730  "TARGET_SSE4_1"
8731  "%vphminposuw\t{%1, %0|%0, %1}"
8732  [(set_attr "type" "sselog1")
8733   (set_attr "prefix_extra" "1")
8734   (set_attr "prefix" "maybe_vex")
8735   (set_attr "mode" "TI")])
8736
8737(define_insn "avx2_<code>v16qiv16hi2"
8738  [(set (match_operand:V16HI 0 "register_operand" "=x")
8739	(any_extend:V16HI
8740	  (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
8741  "TARGET_AVX2"
8742  "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8743  [(set_attr "type" "ssemov")
8744   (set_attr "prefix_extra" "1")
8745   (set_attr "prefix" "vex")
8746   (set_attr "mode" "OI")])
8747
8748(define_insn "sse4_1_<code>v8qiv8hi2"
8749  [(set (match_operand:V8HI 0 "register_operand" "=x")
8750	(any_extend:V8HI
8751	  (vec_select:V8QI
8752	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8753	    (parallel [(const_int 0) (const_int 1)
8754		       (const_int 2) (const_int 3)
8755		       (const_int 4) (const_int 5)
8756		       (const_int 6) (const_int 7)]))))]
8757  "TARGET_SSE4_1"
8758  "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
8759  [(set_attr "type" "ssemov")
8760   (set_attr "ssememalign" "64")
8761   (set_attr "prefix_extra" "1")
8762   (set_attr "prefix" "maybe_vex")
8763   (set_attr "mode" "TI")])
8764
8765(define_insn "avx2_<code>v8qiv8si2"
8766  [(set (match_operand:V8SI 0 "register_operand" "=x")
8767	(any_extend:V8SI
8768	  (vec_select:V8QI
8769	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8770	    (parallel [(const_int 0) (const_int 1)
8771		       (const_int 2) (const_int 3)
8772		       (const_int 4) (const_int 5)
8773		       (const_int 6) (const_int 7)]))))]
8774  "TARGET_AVX2"
8775  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
8776  [(set_attr "type" "ssemov")
8777   (set_attr "prefix_extra" "1")
8778   (set_attr "prefix" "vex")
8779   (set_attr "mode" "OI")])
8780
8781(define_insn "sse4_1_<code>v4qiv4si2"
8782  [(set (match_operand:V4SI 0 "register_operand" "=x")
8783	(any_extend:V4SI
8784	  (vec_select:V4QI
8785	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8786	    (parallel [(const_int 0) (const_int 1)
8787		       (const_int 2) (const_int 3)]))))]
8788  "TARGET_SSE4_1"
8789  "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
8790  [(set_attr "type" "ssemov")
8791   (set_attr "ssememalign" "32")
8792   (set_attr "prefix_extra" "1")
8793   (set_attr "prefix" "maybe_vex")
8794   (set_attr "mode" "TI")])
8795
8796(define_insn "avx2_<code>v8hiv8si2"
8797  [(set (match_operand:V8SI 0 "register_operand" "=x")
8798	(any_extend:V8SI
8799	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
8800  "TARGET_AVX2"
8801  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8802  [(set_attr "type" "ssemov")
8803   (set_attr "prefix_extra" "1")
8804   (set_attr "prefix" "vex")
8805   (set_attr "mode" "OI")])
8806
8807(define_insn "sse4_1_<code>v4hiv4si2"
8808  [(set (match_operand:V4SI 0 "register_operand" "=x")
8809	(any_extend:V4SI
8810	  (vec_select:V4HI
8811	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8812	    (parallel [(const_int 0) (const_int 1)
8813		       (const_int 2) (const_int 3)]))))]
8814  "TARGET_SSE4_1"
8815  "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
8816  [(set_attr "type" "ssemov")
8817   (set_attr "ssememalign" "64")
8818   (set_attr "prefix_extra" "1")
8819   (set_attr "prefix" "maybe_vex")
8820   (set_attr "mode" "TI")])
8821
8822(define_insn "avx2_<code>v4qiv4di2"
8823  [(set (match_operand:V4DI 0 "register_operand" "=x")
8824	(any_extend:V4DI
8825	  (vec_select:V4QI
8826	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8827	    (parallel [(const_int 0) (const_int 1)
8828		       (const_int 2) (const_int 3)]))))]
8829  "TARGET_AVX2"
8830  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
8831  [(set_attr "type" "ssemov")
8832   (set_attr "prefix_extra" "1")
8833   (set_attr "prefix" "vex")
8834   (set_attr "mode" "OI")])
8835
8836(define_insn "sse4_1_<code>v2qiv2di2"
8837  [(set (match_operand:V2DI 0 "register_operand" "=x")
8838	(any_extend:V2DI
8839	  (vec_select:V2QI
8840	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8841	    (parallel [(const_int 0) (const_int 1)]))))]
8842  "TARGET_SSE4_1"
8843  "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
8844  [(set_attr "type" "ssemov")
8845   (set_attr "ssememalign" "16")
8846   (set_attr "prefix_extra" "1")
8847   (set_attr "prefix" "maybe_vex")
8848   (set_attr "mode" "TI")])
8849
8850(define_insn "avx2_<code>v4hiv4di2"
8851  [(set (match_operand:V4DI 0 "register_operand" "=x")
8852	(any_extend:V4DI
8853	  (vec_select:V4HI
8854	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8855	    (parallel [(const_int 0) (const_int 1)
8856		       (const_int 2) (const_int 3)]))))]
8857  "TARGET_AVX2"
8858  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
8859  [(set_attr "type" "ssemov")
8860   (set_attr "prefix_extra" "1")
8861   (set_attr "prefix" "vex")
8862   (set_attr "mode" "OI")])
8863
8864(define_insn "sse4_1_<code>v2hiv2di2"
8865  [(set (match_operand:V2DI 0 "register_operand" "=x")
8866	(any_extend:V2DI
8867	  (vec_select:V2HI
8868	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8869	    (parallel [(const_int 0) (const_int 1)]))))]
8870  "TARGET_SSE4_1"
8871  "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
8872  [(set_attr "type" "ssemov")
8873   (set_attr "ssememalign" "32")
8874   (set_attr "prefix_extra" "1")
8875   (set_attr "prefix" "maybe_vex")
8876   (set_attr "mode" "TI")])
8877
8878(define_insn "avx2_<code>v4siv4di2"
8879  [(set (match_operand:V4DI 0 "register_operand" "=x")
8880	(any_extend:V4DI
8881	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
8882  "TARGET_AVX2"
8883  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8884  [(set_attr "type" "ssemov")
8885   (set_attr "prefix_extra" "1")
8886   (set_attr "mode" "OI")])
8887
8888(define_insn "sse4_1_<code>v2siv2di2"
8889  [(set (match_operand:V2DI 0 "register_operand" "=x")
8890	(any_extend:V2DI
8891	  (vec_select:V2SI
8892	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8893	    (parallel [(const_int 0) (const_int 1)]))))]
8894  "TARGET_SSE4_1"
8895  "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
8896  [(set_attr "type" "ssemov")
8897   (set_attr "ssememalign" "64")
8898   (set_attr "prefix_extra" "1")
8899   (set_attr "prefix" "maybe_vex")
8900   (set_attr "mode" "TI")])
8901
8902;; ptestps/ptestpd are very similar to comiss and ucomiss when
8903;; setting FLAGS_REG. But it is not a really compare instruction.
8904(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8905  [(set (reg:CC FLAGS_REG)
8906	(unspec:CC [(match_operand:VF 0 "register_operand" "x")
8907		    (match_operand:VF 1 "nonimmediate_operand" "xm")]
8908		   UNSPEC_VTESTP))]
8909  "TARGET_AVX"
8910  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8911  [(set_attr "type" "ssecomi")
8912   (set_attr "prefix_extra" "1")
8913   (set_attr "prefix" "vex")
8914   (set_attr "mode" "<MODE>")])
8915
8916;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8917;; But it is not a really compare instruction.
8918(define_insn "avx_ptest256"
8919  [(set (reg:CC FLAGS_REG)
8920	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8921		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8922		   UNSPEC_PTEST))]
8923  "TARGET_AVX"
8924  "vptest\t{%1, %0|%0, %1}"
8925  [(set_attr "type" "ssecomi")
8926   (set_attr "prefix_extra" "1")
8927   (set_attr "prefix" "vex")
8928   (set_attr "btver2_decode" "vector")
8929   (set_attr "mode" "OI")])
8930
8931(define_insn "sse4_1_ptest"
8932  [(set (reg:CC FLAGS_REG)
8933	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8934		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8935		   UNSPEC_PTEST))]
8936  "TARGET_SSE4_1"
8937  "%vptest\t{%1, %0|%0, %1}"
8938  [(set_attr "type" "ssecomi")
8939   (set_attr "prefix_extra" "1")
8940   (set_attr "prefix" "maybe_vex")
8941   (set_attr "mode" "TI")])
8942
8943(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8944  [(set (match_operand:VF 0 "register_operand" "=x")
8945	(unspec:VF
8946	  [(match_operand:VF 1 "nonimmediate_operand" "xm")
8947	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
8948	  UNSPEC_ROUND))]
8949  "TARGET_ROUND"
8950  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8951  [(set_attr "type" "ssecvt")
8952   (set (attr "prefix_data16")
8953     (if_then_else
8954       (match_test "TARGET_AVX")
8955     (const_string "*")
8956     (const_string "1")))
8957   (set_attr "prefix_extra" "1")
8958   (set_attr "length_immediate" "1")
8959   (set_attr "prefix" "maybe_vex")
8960   (set_attr "mode" "<MODE>")])
8961
8962(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
8963  [(match_operand:<sseintvecmode> 0 "register_operand")
8964   (match_operand:VF1 1 "nonimmediate_operand")
8965   (match_operand:SI 2 "const_0_to_15_operand")]
8966  "TARGET_ROUND"
8967{
8968  rtx tmp = gen_reg_rtx (<MODE>mode);
8969
8970  emit_insn
8971    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
8972						       operands[2]));
8973  emit_insn
8974    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
8975  DONE;
8976})
8977
8978(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
8979  [(match_operand:<ssepackfltmode> 0 "register_operand")
8980   (match_operand:VF2 1 "nonimmediate_operand")
8981   (match_operand:VF2 2 "nonimmediate_operand")
8982   (match_operand:SI 3 "const_0_to_15_operand")]
8983  "TARGET_ROUND"
8984{
8985  rtx tmp0, tmp1;
8986
8987  if (<MODE>mode == V2DFmode
8988      && TARGET_AVX && !TARGET_PREFER_AVX128)
8989    {
8990      rtx tmp2 = gen_reg_rtx (V4DFmode);
8991
8992      tmp0 = gen_reg_rtx (V4DFmode);
8993      tmp1 = force_reg (V2DFmode, operands[1]);
8994
8995      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
8996      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
8997      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
8998    }
8999  else
9000    {
9001      tmp0 = gen_reg_rtx (<MODE>mode);
9002      tmp1 = gen_reg_rtx (<MODE>mode);
9003
9004      emit_insn
9005       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9006							  operands[3]));
9007      emit_insn
9008       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9009							  operands[3]));
9010      emit_insn
9011       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9012    }
9013  DONE;
9014})
9015
9016(define_insn "sse4_1_round<ssescalarmodesuffix>"
9017  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9018	(vec_merge:VF_128
9019	  (unspec:VF_128
9020	    [(match_operand:VF_128 2 "register_operand" "x,x")
9021	     (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9022	    UNSPEC_ROUND)
9023	  (match_operand:VF_128 1 "register_operand" "0,x")
9024	  (const_int 1)))]
9025  "TARGET_ROUND"
9026  "@
9027   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9028   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9029  [(set_attr "isa" "noavx,avx")
9030   (set_attr "type" "ssecvt")
9031   (set_attr "length_immediate" "1")
9032   (set_attr "prefix_data16" "1,*")
9033   (set_attr "prefix_extra" "1")
9034   (set_attr "prefix" "orig,vex")
9035   (set_attr "mode" "<MODE>")])
9036
9037(define_expand "round<mode>2"
9038  [(set (match_dup 4)
9039	(plus:VF
9040	  (match_operand:VF 1 "register_operand")
9041	  (match_dup 3)))
9042   (set (match_operand:VF 0 "register_operand")
9043	(unspec:VF
9044	  [(match_dup 4) (match_dup 5)]
9045	  UNSPEC_ROUND))]
9046  "TARGET_ROUND && !flag_trapping_math"
9047{
9048  enum machine_mode scalar_mode;
9049  const struct real_format *fmt;
9050  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9051  rtx half, vec_half;
9052
9053  scalar_mode = GET_MODE_INNER (<MODE>mode);
9054
9055  /* load nextafter (0.5, 0.0) */
9056  fmt = REAL_MODE_FORMAT (scalar_mode);
9057  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9058  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9059  half = const_double_from_real_value (pred_half, scalar_mode);
9060
9061  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9062  vec_half = force_reg (<MODE>mode, vec_half);
9063
9064  operands[3] = gen_reg_rtx (<MODE>mode);
9065  emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9066
9067  operands[4] = gen_reg_rtx (<MODE>mode);
9068  operands[5] = GEN_INT (ROUND_TRUNC);
9069})
9070
9071(define_expand "round<mode>2_sfix"
9072  [(match_operand:<sseintvecmode> 0 "register_operand")
9073   (match_operand:VF1 1 "register_operand")]
9074  "TARGET_ROUND && !flag_trapping_math"
9075{
9076  rtx tmp = gen_reg_rtx (<MODE>mode);
9077
9078  emit_insn (gen_round<mode>2 (tmp, operands[1]));
9079
9080  emit_insn
9081    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9082  DONE;
9083})
9084
9085(define_expand "round<mode>2_vec_pack_sfix"
9086  [(match_operand:<ssepackfltmode> 0 "register_operand")
9087   (match_operand:VF2 1 "register_operand")
9088   (match_operand:VF2 2 "register_operand")]
9089  "TARGET_ROUND && !flag_trapping_math"
9090{
9091  rtx tmp0, tmp1;
9092
9093  if (<MODE>mode == V2DFmode
9094      && TARGET_AVX && !TARGET_PREFER_AVX128)
9095    {
9096      rtx tmp2 = gen_reg_rtx (V4DFmode);
9097
9098      tmp0 = gen_reg_rtx (V4DFmode);
9099      tmp1 = force_reg (V2DFmode, operands[1]);
9100
9101      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9102      emit_insn (gen_roundv4df2 (tmp2, tmp0));
9103      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9104    }
9105  else
9106    {
9107      tmp0 = gen_reg_rtx (<MODE>mode);
9108      tmp1 = gen_reg_rtx (<MODE>mode);
9109
9110      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
9111      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
9112
9113      emit_insn
9114       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9115    }
9116  DONE;
9117})
9118
9119;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9120;;
9121;; Intel SSE4.2 string/text processing instructions
9122;;
9123;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9124
9125(define_insn_and_split "sse4_2_pcmpestr"
9126  [(set (match_operand:SI 0 "register_operand" "=c,c")
9127	(unspec:SI
9128	  [(match_operand:V16QI 2 "register_operand" "x,x")
9129	   (match_operand:SI 3 "register_operand" "a,a")
9130	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
9131	   (match_operand:SI 5 "register_operand" "d,d")
9132	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9133	  UNSPEC_PCMPESTR))
9134   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9135	(unspec:V16QI
9136	  [(match_dup 2)
9137	   (match_dup 3)
9138	   (match_dup 4)
9139	   (match_dup 5)
9140	   (match_dup 6)]
9141	  UNSPEC_PCMPESTR))
9142   (set (reg:CC FLAGS_REG)
9143	(unspec:CC
9144	  [(match_dup 2)
9145	   (match_dup 3)
9146	   (match_dup 4)
9147	   (match_dup 5)
9148	   (match_dup 6)]
9149	  UNSPEC_PCMPESTR))]
9150  "TARGET_SSE4_2
9151   && can_create_pseudo_p ()"
9152  "#"
9153  "&& 1"
9154  [(const_int 0)]
9155{
9156  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9157  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9158  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9159
9160  if (ecx)
9161    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9162				     operands[3], operands[4],
9163				     operands[5], operands[6]));
9164  if (xmm0)
9165    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9166				     operands[3], operands[4],
9167				     operands[5], operands[6]));
9168  if (flags && !(ecx || xmm0))
9169    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9170					   operands[2], operands[3],
9171					   operands[4], operands[5],
9172					   operands[6]));
9173  if (!(flags || ecx || xmm0))
9174    emit_note (NOTE_INSN_DELETED);
9175
9176  DONE;
9177}
9178  [(set_attr "type" "sselog")
9179   (set_attr "prefix_data16" "1")
9180   (set_attr "prefix_extra" "1")
9181   (set_attr "ssememalign" "8")
9182   (set_attr "length_immediate" "1")
9183   (set_attr "memory" "none,load")
9184   (set_attr "mode" "TI")])
9185
9186(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
9187  [(set (match_operand:SI 0 "register_operand" "=c")
9188	(unspec:SI
9189	  [(match_operand:V16QI 2 "register_operand" "x")
9190	   (match_operand:SI 3 "register_operand" "a")
9191	   (unspec:V16QI
9192	     [(match_operand:V16QI 4 "memory_operand" "m")]
9193	     UNSPEC_LOADU)
9194	   (match_operand:SI 5 "register_operand" "d")
9195	   (match_operand:SI 6 "const_0_to_255_operand" "n")]
9196	  UNSPEC_PCMPESTR))
9197   (set (match_operand:V16QI 1 "register_operand" "=Yz")
9198	(unspec:V16QI
9199	  [(match_dup 2)
9200	   (match_dup 3)
9201	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9202	   (match_dup 5)
9203	   (match_dup 6)]
9204	  UNSPEC_PCMPESTR))
9205   (set (reg:CC FLAGS_REG)
9206	(unspec:CC
9207	  [(match_dup 2)
9208	   (match_dup 3)
9209	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9210	   (match_dup 5)
9211	   (match_dup 6)]
9212	  UNSPEC_PCMPESTR))]
9213  "TARGET_SSE4_2
9214   && can_create_pseudo_p ()"
9215  "#"
9216  "&& 1"
9217  [(const_int 0)]
9218{
9219  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9220  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9221  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9222
9223  if (ecx)
9224    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9225				     operands[3], operands[4],
9226				     operands[5], operands[6]));
9227  if (xmm0)
9228    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9229				     operands[3], operands[4],
9230				     operands[5], operands[6]));
9231  if (flags && !(ecx || xmm0))
9232    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9233					   operands[2], operands[3],
9234					   operands[4], operands[5],
9235					   operands[6]));
9236  if (!(flags || ecx || xmm0))
9237    emit_note (NOTE_INSN_DELETED);
9238
9239  DONE;
9240}
9241  [(set_attr "type" "sselog")
9242   (set_attr "prefix_data16" "1")
9243   (set_attr "prefix_extra" "1")
9244   (set_attr "ssememalign" "8")
9245   (set_attr "length_immediate" "1")
9246   (set_attr "memory" "load")
9247   (set_attr "mode" "TI")])
9248
9249(define_insn "sse4_2_pcmpestri"
9250  [(set (match_operand:SI 0 "register_operand" "=c,c")
9251	(unspec:SI
9252	  [(match_operand:V16QI 1 "register_operand" "x,x")
9253	   (match_operand:SI 2 "register_operand" "a,a")
9254	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9255	   (match_operand:SI 4 "register_operand" "d,d")
9256	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9257	  UNSPEC_PCMPESTR))
9258   (set (reg:CC FLAGS_REG)
9259	(unspec:CC
9260	  [(match_dup 1)
9261	   (match_dup 2)
9262	   (match_dup 3)
9263	   (match_dup 4)
9264	   (match_dup 5)]
9265	  UNSPEC_PCMPESTR))]
9266  "TARGET_SSE4_2"
9267  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9268  [(set_attr "type" "sselog")
9269   (set_attr "prefix_data16" "1")
9270   (set_attr "prefix_extra" "1")
9271   (set_attr "prefix" "maybe_vex")
9272   (set_attr "ssememalign" "8")
9273   (set_attr "length_immediate" "1")
9274   (set_attr "btver2_decode" "vector")
9275   (set_attr "memory" "none,load")
9276   (set_attr "mode" "TI")])
9277
9278(define_insn "sse4_2_pcmpestrm"
9279  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9280	(unspec:V16QI
9281	  [(match_operand:V16QI 1 "register_operand" "x,x")
9282	   (match_operand:SI 2 "register_operand" "a,a")
9283	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9284	   (match_operand:SI 4 "register_operand" "d,d")
9285	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9286	  UNSPEC_PCMPESTR))
9287   (set (reg:CC FLAGS_REG)
9288	(unspec:CC
9289	  [(match_dup 1)
9290	   (match_dup 2)
9291	   (match_dup 3)
9292	   (match_dup 4)
9293	   (match_dup 5)]
9294	  UNSPEC_PCMPESTR))]
9295  "TARGET_SSE4_2"
9296  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9297  [(set_attr "type" "sselog")
9298   (set_attr "prefix_data16" "1")
9299   (set_attr "prefix_extra" "1")
9300   (set_attr "ssememalign" "8")
9301   (set_attr "length_immediate" "1")
9302   (set_attr "prefix" "maybe_vex")
9303   (set_attr "btver2_decode" "vector")
9304   (set_attr "memory" "none,load")
9305   (set_attr "mode" "TI")])
9306
9307(define_insn "sse4_2_pcmpestr_cconly"
9308  [(set (reg:CC FLAGS_REG)
9309	(unspec:CC
9310	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9311	   (match_operand:SI 3 "register_operand" "a,a,a,a")
9312	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9313	   (match_operand:SI 5 "register_operand" "d,d,d,d")
9314	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9315	  UNSPEC_PCMPESTR))
9316   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9317   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
9318  "TARGET_SSE4_2"
9319  "@
9320   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9321   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9322   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9323   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9324  [(set_attr "type" "sselog")
9325   (set_attr "prefix_data16" "1")
9326   (set_attr "prefix_extra" "1")
9327   (set_attr "ssememalign" "8")
9328   (set_attr "length_immediate" "1")
9329   (set_attr "memory" "none,load,none,load")
9330   (set_attr "btver2_decode" "vector,vector,vector,vector")
9331   (set_attr "prefix" "maybe_vex")
9332   (set_attr "mode" "TI")])
9333
9334(define_insn_and_split "sse4_2_pcmpistr"
9335  [(set (match_operand:SI 0 "register_operand" "=c,c")
9336	(unspec:SI
9337	  [(match_operand:V16QI 2 "register_operand" "x,x")
9338	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9339	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9340	  UNSPEC_PCMPISTR))
9341   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9342	(unspec:V16QI
9343	  [(match_dup 2)
9344	   (match_dup 3)
9345	   (match_dup 4)]
9346	  UNSPEC_PCMPISTR))
9347   (set (reg:CC FLAGS_REG)
9348	(unspec:CC
9349	  [(match_dup 2)
9350	   (match_dup 3)
9351	   (match_dup 4)]
9352	  UNSPEC_PCMPISTR))]
9353  "TARGET_SSE4_2
9354   && can_create_pseudo_p ()"
9355  "#"
9356  "&& 1"
9357  [(const_int 0)]
9358{
9359  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9360  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9361  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9362
9363  if (ecx)
9364    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9365				     operands[3], operands[4]));
9366  if (xmm0)
9367    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9368				     operands[3], operands[4]));
9369  if (flags && !(ecx || xmm0))
9370    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9371					   operands[2], operands[3],
9372					   operands[4]));
9373  if (!(flags || ecx || xmm0))
9374    emit_note (NOTE_INSN_DELETED);
9375
9376  DONE;
9377}
9378  [(set_attr "type" "sselog")
9379   (set_attr "prefix_data16" "1")
9380   (set_attr "prefix_extra" "1")
9381   (set_attr "ssememalign" "8")
9382   (set_attr "length_immediate" "1")
9383   (set_attr "memory" "none,load")
9384   (set_attr "mode" "TI")])
9385
9386(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
9387  [(set (match_operand:SI 0 "register_operand" "=c")
9388	(unspec:SI
9389	  [(match_operand:V16QI 2 "register_operand" "x")
9390	   (unspec:V16QI
9391	     [(match_operand:V16QI 3 "memory_operand" "m")]
9392	     UNSPEC_LOADU)
9393	   (match_operand:SI 4 "const_0_to_255_operand" "n")]
9394	  UNSPEC_PCMPISTR))
9395   (set (match_operand:V16QI 1 "register_operand" "=Yz")
9396	(unspec:V16QI
9397	  [(match_dup 2)
9398	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9399	   (match_dup 4)]
9400	  UNSPEC_PCMPISTR))
9401   (set (reg:CC FLAGS_REG)
9402	(unspec:CC
9403	  [(match_dup 2)
9404	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9405	   (match_dup 4)]
9406	  UNSPEC_PCMPISTR))]
9407  "TARGET_SSE4_2
9408   && can_create_pseudo_p ()"
9409  "#"
9410  "&& 1"
9411  [(const_int 0)]
9412{
9413  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9414  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9415  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9416
9417  if (ecx)
9418    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9419				     operands[3], operands[4]));
9420  if (xmm0)
9421    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9422				     operands[3], operands[4]));
9423  if (flags && !(ecx || xmm0))
9424    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9425					   operands[2], operands[3],
9426					   operands[4]));
9427  if (!(flags || ecx || xmm0))
9428    emit_note (NOTE_INSN_DELETED);
9429
9430  DONE;
9431}
9432  [(set_attr "type" "sselog")
9433   (set_attr "prefix_data16" "1")
9434   (set_attr "prefix_extra" "1")
9435   (set_attr "ssememalign" "8")
9436   (set_attr "length_immediate" "1")
9437   (set_attr "memory" "load")
9438   (set_attr "mode" "TI")])
9439
9440(define_insn "sse4_2_pcmpistri"
9441  [(set (match_operand:SI 0 "register_operand" "=c,c")
9442	(unspec:SI
9443	  [(match_operand:V16QI 1 "register_operand" "x,x")
9444	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9445	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9446	  UNSPEC_PCMPISTR))
9447   (set (reg:CC FLAGS_REG)
9448	(unspec:CC
9449	  [(match_dup 1)
9450	   (match_dup 2)
9451	   (match_dup 3)]
9452	  UNSPEC_PCMPISTR))]
9453  "TARGET_SSE4_2"
9454  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9455  [(set_attr "type" "sselog")
9456   (set_attr "prefix_data16" "1")
9457   (set_attr "prefix_extra" "1")
9458   (set_attr "ssememalign" "8")
9459   (set_attr "length_immediate" "1")
9460   (set_attr "prefix" "maybe_vex")
9461   (set_attr "memory" "none,load")
9462   (set_attr "btver2_decode" "vector")
9463   (set_attr "mode" "TI")])
9464
9465(define_insn "sse4_2_pcmpistrm"
9466  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9467	(unspec:V16QI
9468	  [(match_operand:V16QI 1 "register_operand" "x,x")
9469	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9470	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9471	  UNSPEC_PCMPISTR))
9472   (set (reg:CC FLAGS_REG)
9473	(unspec:CC
9474	  [(match_dup 1)
9475	   (match_dup 2)
9476	   (match_dup 3)]
9477	  UNSPEC_PCMPISTR))]
9478  "TARGET_SSE4_2"
9479  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9480  [(set_attr "type" "sselog")
9481   (set_attr "prefix_data16" "1")
9482   (set_attr "prefix_extra" "1")
9483   (set_attr "ssememalign" "8")
9484   (set_attr "length_immediate" "1")
9485   (set_attr "prefix" "maybe_vex")
9486   (set_attr "memory" "none,load")
9487   (set_attr "btver2_decode" "vector")
9488   (set_attr "mode" "TI")])
9489
9490(define_insn "sse4_2_pcmpistr_cconly"
9491  [(set (reg:CC FLAGS_REG)
9492	(unspec:CC
9493	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9494	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9495	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9496	  UNSPEC_PCMPISTR))
9497   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9498   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
9499  "TARGET_SSE4_2"
9500  "@
9501   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9502   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9503   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9504   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9505  [(set_attr "type" "sselog")
9506   (set_attr "prefix_data16" "1")
9507   (set_attr "prefix_extra" "1")
9508   (set_attr "ssememalign" "8")
9509   (set_attr "length_immediate" "1")
9510   (set_attr "memory" "none,load,none,load")
9511   (set_attr "prefix" "maybe_vex")
9512   (set_attr "btver2_decode" "vector,vector,vector,vector")
9513   (set_attr "mode" "TI")])
9514
9515;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9516;;
9517;; XOP instructions
9518;;
9519;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9520
9521(define_code_iterator xop_plus [plus ss_plus])
9522
9523(define_code_attr macs [(plus "macs") (ss_plus "macss")])
9524(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
9525
9526;; XOP parallel integer multiply/add instructions.
9527
9528(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
9529  [(set (match_operand:VI24_128 0 "register_operand" "=x")
9530	(xop_plus:VI24_128
9531	 (mult:VI24_128
9532	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
9533	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
9534	 (match_operand:VI24_128 3 "register_operand" "x")))]
9535  "TARGET_XOP"
9536  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9537  [(set_attr "type" "ssemuladd")
9538   (set_attr "mode" "TI")])
9539
9540(define_insn "xop_p<macs>dql"
9541  [(set (match_operand:V2DI 0 "register_operand" "=x")
9542	(xop_plus:V2DI
9543	 (mult:V2DI
9544	  (sign_extend:V2DI
9545	   (vec_select:V2SI
9546	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9547	    (parallel [(const_int 0) (const_int 2)])))
9548	  (sign_extend:V2DI
9549	   (vec_select:V2SI
9550	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9551	    (parallel [(const_int 0) (const_int 2)]))))
9552	 (match_operand:V2DI 3 "register_operand" "x")))]
9553  "TARGET_XOP"
9554  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9555  [(set_attr "type" "ssemuladd")
9556   (set_attr "mode" "TI")])
9557
9558(define_insn "xop_p<macs>dqh"
9559  [(set (match_operand:V2DI 0 "register_operand" "=x")
9560	(xop_plus:V2DI
9561	 (mult:V2DI
9562	  (sign_extend:V2DI
9563	   (vec_select:V2SI
9564	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9565	    (parallel [(const_int 1) (const_int 3)])))
9566	  (sign_extend:V2DI
9567	   (vec_select:V2SI
9568	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9569	    (parallel [(const_int 1) (const_int 3)]))))
9570	 (match_operand:V2DI 3 "register_operand" "x")))]
9571  "TARGET_XOP"
9572  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9573  [(set_attr "type" "ssemuladd")
9574   (set_attr "mode" "TI")])
9575
9576;; XOP parallel integer multiply/add instructions for the intrinisics
9577(define_insn "xop_p<macs>wd"
9578  [(set (match_operand:V4SI 0 "register_operand" "=x")
9579	(xop_plus:V4SI
9580	 (mult:V4SI
9581	  (sign_extend:V4SI
9582	   (vec_select:V4HI
9583	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9584	    (parallel [(const_int 1) (const_int 3)
9585		       (const_int 5) (const_int 7)])))
9586	  (sign_extend:V4SI
9587	   (vec_select:V4HI
9588	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9589	    (parallel [(const_int 1) (const_int 3)
9590		       (const_int 5) (const_int 7)]))))
9591	 (match_operand:V4SI 3 "register_operand" "x")))]
9592  "TARGET_XOP"
9593  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9594  [(set_attr "type" "ssemuladd")
9595   (set_attr "mode" "TI")])
9596
9597(define_insn "xop_p<madcs>wd"
9598  [(set (match_operand:V4SI 0 "register_operand" "=x")
9599	(xop_plus:V4SI
9600	 (plus:V4SI
9601	  (mult:V4SI
9602	   (sign_extend:V4SI
9603	    (vec_select:V4HI
9604	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9605	     (parallel [(const_int 0) (const_int 2)
9606			(const_int 4) (const_int 6)])))
9607	   (sign_extend:V4SI
9608	    (vec_select:V4HI
9609	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9610	     (parallel [(const_int 0) (const_int 2)
9611			(const_int 4) (const_int 6)]))))
9612	  (mult:V4SI
9613	   (sign_extend:V4SI
9614	    (vec_select:V4HI
9615	     (match_dup 1)
9616	     (parallel [(const_int 1) (const_int 3)
9617			(const_int 5) (const_int 7)])))
9618	   (sign_extend:V4SI
9619	    (vec_select:V4HI
9620	     (match_dup 2)
9621	     (parallel [(const_int 1) (const_int 3)
9622			(const_int 5) (const_int 7)])))))
9623	 (match_operand:V4SI 3 "register_operand" "x")))]
9624  "TARGET_XOP"
9625  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9626  [(set_attr "type" "ssemuladd")
9627   (set_attr "mode" "TI")])
9628
9629;; XOP parallel XMM conditional moves
9630(define_insn "xop_pcmov_<mode><avxsizesuffix>"
9631  [(set (match_operand:V 0 "register_operand" "=x,x")
9632	(if_then_else:V
9633	  (match_operand:V 3 "nonimmediate_operand" "x,m")
9634	  (match_operand:V 1 "register_operand" "x,x")
9635	  (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
9636  "TARGET_XOP"
9637  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9638  [(set_attr "type" "sse4arg")])
9639
9640;; XOP horizontal add/subtract instructions
9641(define_insn "xop_phadd<u>bw"
9642  [(set (match_operand:V8HI 0 "register_operand" "=x")
9643	(plus:V8HI
9644	 (any_extend:V8HI
9645	  (vec_select:V8QI
9646	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9647	   (parallel [(const_int 0) (const_int 2)
9648		      (const_int 4) (const_int 6)
9649		      (const_int 8) (const_int 10)
9650		      (const_int 12) (const_int 14)])))
9651	 (any_extend:V8HI
9652	  (vec_select:V8QI
9653	   (match_dup 1)
9654	   (parallel [(const_int 1) (const_int 3)
9655		      (const_int 5) (const_int 7)
9656		      (const_int 9) (const_int 11)
9657		      (const_int 13) (const_int 15)])))))]
9658  "TARGET_XOP"
9659  "vphadd<u>bw\t{%1, %0|%0, %1}"
9660  [(set_attr "type" "sseiadd1")])
9661
9662(define_insn "xop_phadd<u>bd"
9663  [(set (match_operand:V4SI 0 "register_operand" "=x")
9664	(plus:V4SI
9665	 (plus:V4SI
9666	  (any_extend:V4SI
9667	   (vec_select:V4QI
9668	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9669	    (parallel [(const_int 0) (const_int 4)
9670		       (const_int 8) (const_int 12)])))
9671	  (any_extend:V4SI
9672	   (vec_select:V4QI
9673	    (match_dup 1)
9674	    (parallel [(const_int 1) (const_int 5)
9675		       (const_int 9) (const_int 13)]))))
9676	 (plus:V4SI
9677	  (any_extend:V4SI
9678	   (vec_select:V4QI
9679	    (match_dup 1)
9680	    (parallel [(const_int 2) (const_int 6)
9681		       (const_int 10) (const_int 14)])))
9682	  (any_extend:V4SI
9683	   (vec_select:V4QI
9684	    (match_dup 1)
9685	    (parallel [(const_int 3) (const_int 7)
9686		       (const_int 11) (const_int 15)]))))))]
9687  "TARGET_XOP"
9688  "vphadd<u>bd\t{%1, %0|%0, %1}"
9689  [(set_attr "type" "sseiadd1")])
9690
9691(define_insn "xop_phadd<u>bq"
9692  [(set (match_operand:V2DI 0 "register_operand" "=x")
9693	(plus:V2DI
9694	 (plus:V2DI
9695	  (plus:V2DI
9696	   (any_extend:V2DI
9697	    (vec_select:V2QI
9698	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9699	     (parallel [(const_int 0) (const_int 8)])))
9700	   (any_extend:V2DI
9701	    (vec_select:V2QI
9702	     (match_dup 1)
9703	     (parallel [(const_int 1) (const_int 9)]))))
9704	  (plus:V2DI
9705	   (any_extend:V2DI
9706	    (vec_select:V2QI
9707	     (match_dup 1)
9708	     (parallel [(const_int 2) (const_int 10)])))
9709	   (any_extend:V2DI
9710	    (vec_select:V2QI
9711	     (match_dup 1)
9712	     (parallel [(const_int 3) (const_int 11)])))))
9713	 (plus:V2DI
9714	  (plus:V2DI
9715	   (any_extend:V2DI
9716	    (vec_select:V2QI
9717	     (match_dup 1)
9718	     (parallel [(const_int 4) (const_int 12)])))
9719	   (any_extend:V2DI
9720	    (vec_select:V2QI
9721	     (match_dup 1)
9722	     (parallel [(const_int 5) (const_int 13)]))))
9723	  (plus:V2DI
9724	   (any_extend:V2DI
9725	    (vec_select:V2QI
9726	     (match_dup 1)
9727	     (parallel [(const_int 6) (const_int 14)])))
9728	   (any_extend:V2DI
9729	    (vec_select:V2QI
9730	     (match_dup 1)
9731	     (parallel [(const_int 7) (const_int 15)])))))))]
9732  "TARGET_XOP"
9733  "vphadd<u>bq\t{%1, %0|%0, %1}"
9734  [(set_attr "type" "sseiadd1")])
9735
9736(define_insn "xop_phadd<u>wd"
9737  [(set (match_operand:V4SI 0 "register_operand" "=x")
9738	(plus:V4SI
9739	 (any_extend:V4SI
9740	  (vec_select:V4HI
9741	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9742	   (parallel [(const_int 0) (const_int 2)
9743		      (const_int 4) (const_int 6)])))
9744	 (any_extend:V4SI
9745	  (vec_select:V4HI
9746	   (match_dup 1)
9747	   (parallel [(const_int 1) (const_int 3)
9748		      (const_int 5) (const_int 7)])))))]
9749  "TARGET_XOP"
9750  "vphadd<u>wd\t{%1, %0|%0, %1}"
9751  [(set_attr "type" "sseiadd1")])
9752
9753(define_insn "xop_phadd<u>wq"
9754  [(set (match_operand:V2DI 0 "register_operand" "=x")
9755	(plus:V2DI
9756	 (plus:V2DI
9757	  (any_extend:V2DI
9758	   (vec_select:V2HI
9759	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9760	    (parallel [(const_int 0) (const_int 4)])))
9761	  (any_extend:V2DI
9762	   (vec_select:V2HI
9763	    (match_dup 1)
9764	    (parallel [(const_int 1) (const_int 5)]))))
9765	 (plus:V2DI
9766	  (any_extend:V2DI
9767	   (vec_select:V2HI
9768	    (match_dup 1)
9769	    (parallel [(const_int 2) (const_int 6)])))
9770	  (any_extend:V2DI
9771	   (vec_select:V2HI
9772	    (match_dup 1)
9773	    (parallel [(const_int 3) (const_int 7)]))))))]
9774  "TARGET_XOP"
9775  "vphadd<u>wq\t{%1, %0|%0, %1}"
9776  [(set_attr "type" "sseiadd1")])
9777
9778(define_insn "xop_phadd<u>dq"
9779  [(set (match_operand:V2DI 0 "register_operand" "=x")
9780	(plus:V2DI
9781	 (any_extend:V2DI
9782	  (vec_select:V2SI
9783	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9784	   (parallel [(const_int 0) (const_int 2)])))
9785	 (any_extend:V2DI
9786	  (vec_select:V2SI
9787	   (match_dup 1)
9788	   (parallel [(const_int 1) (const_int 3)])))))]
9789  "TARGET_XOP"
9790  "vphadd<u>dq\t{%1, %0|%0, %1}"
9791  [(set_attr "type" "sseiadd1")])
9792
9793(define_insn "xop_phsubbw"
9794  [(set (match_operand:V8HI 0 "register_operand" "=x")
9795	(minus:V8HI
9796	 (sign_extend:V8HI
9797	  (vec_select:V8QI
9798	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9799	   (parallel [(const_int 0) (const_int 2)
9800		      (const_int 4) (const_int 6)
9801		      (const_int 8) (const_int 10)
9802		      (const_int 12) (const_int 14)])))
9803	 (sign_extend:V8HI
9804	  (vec_select:V8QI
9805	   (match_dup 1)
9806	   (parallel [(const_int 1) (const_int 3)
9807		      (const_int 5) (const_int 7)
9808		      (const_int 9) (const_int 11)
9809		      (const_int 13) (const_int 15)])))))]
9810  "TARGET_XOP"
9811  "vphsubbw\t{%1, %0|%0, %1}"
9812  [(set_attr "type" "sseiadd1")])
9813
9814(define_insn "xop_phsubwd"
9815  [(set (match_operand:V4SI 0 "register_operand" "=x")
9816	(minus:V4SI
9817	 (sign_extend:V4SI
9818	  (vec_select:V4HI
9819	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9820	   (parallel [(const_int 0) (const_int 2)
9821		      (const_int 4) (const_int 6)])))
9822	 (sign_extend:V4SI
9823	  (vec_select:V4HI
9824	   (match_dup 1)
9825	   (parallel [(const_int 1) (const_int 3)
9826		      (const_int 5) (const_int 7)])))))]
9827  "TARGET_XOP"
9828  "vphsubwd\t{%1, %0|%0, %1}"
9829  [(set_attr "type" "sseiadd1")])
9830
9831(define_insn "xop_phsubdq"
9832  [(set (match_operand:V2DI 0 "register_operand" "=x")
9833	(minus:V2DI
9834	 (sign_extend:V2DI
9835	  (vec_select:V2SI
9836	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9837	   (parallel [(const_int 0) (const_int 2)])))
9838	 (sign_extend:V2DI
9839	  (vec_select:V2SI
9840	   (match_dup 1)
9841	   (parallel [(const_int 1) (const_int 3)])))))]
9842  "TARGET_XOP"
9843  "vphsubdq\t{%1, %0|%0, %1}"
9844  [(set_attr "type" "sseiadd1")])
9845
9846;; XOP permute instructions
9847(define_insn "xop_pperm"
9848  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9849	(unspec:V16QI
9850	  [(match_operand:V16QI 1 "register_operand" "x,x")
9851	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9852	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9853	  UNSPEC_XOP_PERMUTE))]
9854  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9855  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9856  [(set_attr "type" "sse4arg")
9857   (set_attr "mode" "TI")])
9858
9859;; XOP pack instructions that combine two vectors into a smaller vector
9860(define_insn "xop_pperm_pack_v2di_v4si"
9861  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9862	(vec_concat:V4SI
9863	 (truncate:V2SI
9864	  (match_operand:V2DI 1 "register_operand" "x,x"))
9865	 (truncate:V2SI
9866	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9867   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9868  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9869  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9870  [(set_attr "type" "sse4arg")
9871   (set_attr "mode" "TI")])
9872
9873(define_insn "xop_pperm_pack_v4si_v8hi"
9874  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9875	(vec_concat:V8HI
9876	 (truncate:V4HI
9877	  (match_operand:V4SI 1 "register_operand" "x,x"))
9878	 (truncate:V4HI
9879	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9880   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9881  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9882  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9883  [(set_attr "type" "sse4arg")
9884   (set_attr "mode" "TI")])
9885
9886(define_insn "xop_pperm_pack_v8hi_v16qi"
9887  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9888	(vec_concat:V16QI
9889	 (truncate:V8QI
9890	  (match_operand:V8HI 1 "register_operand" "x,x"))
9891	 (truncate:V8QI
9892	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9893   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9894  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9895  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9896  [(set_attr "type" "sse4arg")
9897   (set_attr "mode" "TI")])
9898
9899;; XOP packed rotate instructions
9900(define_expand "rotl<mode>3"
9901  [(set (match_operand:VI_128 0 "register_operand")
9902	(rotate:VI_128
9903	 (match_operand:VI_128 1 "nonimmediate_operand")
9904	 (match_operand:SI 2 "general_operand")))]
9905  "TARGET_XOP"
9906{
9907  /* If we were given a scalar, convert it to parallel */
9908  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9909    {
9910      rtvec vs = rtvec_alloc (<ssescalarnum>);
9911      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9912      rtx reg = gen_reg_rtx (<MODE>mode);
9913      rtx op2 = operands[2];
9914      int i;
9915
9916      if (GET_MODE (op2) != <ssescalarmode>mode)
9917	{
9918	  op2 = gen_reg_rtx (<ssescalarmode>mode);
9919	  convert_move (op2, operands[2], false);
9920	}
9921
9922      for (i = 0; i < <ssescalarnum>; i++)
9923	RTVEC_ELT (vs, i) = op2;
9924
9925      emit_insn (gen_vec_init<mode> (reg, par));
9926      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9927      DONE;
9928    }
9929})
9930
9931(define_expand "rotr<mode>3"
9932  [(set (match_operand:VI_128 0 "register_operand")
9933	(rotatert:VI_128
9934	 (match_operand:VI_128 1 "nonimmediate_operand")
9935	 (match_operand:SI 2 "general_operand")))]
9936  "TARGET_XOP"
9937{
9938  /* If we were given a scalar, convert it to parallel */
9939  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9940    {
9941      rtvec vs = rtvec_alloc (<ssescalarnum>);
9942      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9943      rtx neg = gen_reg_rtx (<MODE>mode);
9944      rtx reg = gen_reg_rtx (<MODE>mode);
9945      rtx op2 = operands[2];
9946      int i;
9947
9948      if (GET_MODE (op2) != <ssescalarmode>mode)
9949	{
9950	  op2 = gen_reg_rtx (<ssescalarmode>mode);
9951	  convert_move (op2, operands[2], false);
9952	}
9953
9954      for (i = 0; i < <ssescalarnum>; i++)
9955	RTVEC_ELT (vs, i) = op2;
9956
9957      emit_insn (gen_vec_init<mode> (reg, par));
9958      emit_insn (gen_neg<mode>2 (neg, reg));
9959      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9960      DONE;
9961    }
9962})
9963
9964(define_insn "xop_rotl<mode>3"
9965  [(set (match_operand:VI_128 0 "register_operand" "=x")
9966	(rotate:VI_128
9967	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9968	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9969  "TARGET_XOP"
9970  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9971  [(set_attr "type" "sseishft")
9972   (set_attr "length_immediate" "1")
9973   (set_attr "mode" "TI")])
9974
9975(define_insn "xop_rotr<mode>3"
9976  [(set (match_operand:VI_128 0 "register_operand" "=x")
9977	(rotatert:VI_128
9978	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9979	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9980  "TARGET_XOP"
9981{
9982  operands[3]
9983    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
9984  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9985}
9986  [(set_attr "type" "sseishft")
9987   (set_attr "length_immediate" "1")
9988   (set_attr "mode" "TI")])
9989
9990(define_expand "vrotr<mode>3"
9991  [(match_operand:VI_128 0 "register_operand")
9992   (match_operand:VI_128 1 "register_operand")
9993   (match_operand:VI_128 2 "register_operand")]
9994  "TARGET_XOP"
9995{
9996  rtx reg = gen_reg_rtx (<MODE>mode);
9997  emit_insn (gen_neg<mode>2 (reg, operands[2]));
9998  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9999  DONE;
10000})
10001
10002(define_expand "vrotl<mode>3"
10003  [(match_operand:VI_128 0 "register_operand")
10004   (match_operand:VI_128 1 "register_operand")
10005   (match_operand:VI_128 2 "register_operand")]
10006  "TARGET_XOP"
10007{
10008  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10009  DONE;
10010})
10011
10012(define_insn "xop_vrotl<mode>3"
10013  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10014	(if_then_else:VI_128
10015	 (ge:VI_128
10016	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10017	  (const_int 0))
10018	 (rotate:VI_128
10019	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10020	  (match_dup 2))
10021	 (rotatert:VI_128
10022	  (match_dup 1)
10023	  (neg:VI_128 (match_dup 2)))))]
10024  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10025  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10026  [(set_attr "type" "sseishft")
10027   (set_attr "prefix_data16" "0")
10028   (set_attr "prefix_extra" "2")
10029   (set_attr "mode" "TI")])
10030
10031;; XOP packed shift instructions.
10032(define_expand "vlshr<mode>3"
10033  [(set (match_operand:VI12_128 0 "register_operand")
10034	(lshiftrt:VI12_128
10035	  (match_operand:VI12_128 1 "register_operand")
10036	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
10037  "TARGET_XOP"
10038{
10039  rtx neg = gen_reg_rtx (<MODE>mode);
10040  emit_insn (gen_neg<mode>2 (neg, operands[2]));
10041  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10042  DONE;
10043})
10044
10045(define_expand "vlshr<mode>3"
10046  [(set (match_operand:VI48_128 0 "register_operand")
10047	(lshiftrt:VI48_128
10048	  (match_operand:VI48_128 1 "register_operand")
10049	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
10050  "TARGET_AVX2 || TARGET_XOP"
10051{
10052  if (!TARGET_AVX2)
10053    {
10054      rtx neg = gen_reg_rtx (<MODE>mode);
10055      emit_insn (gen_neg<mode>2 (neg, operands[2]));
10056      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10057      DONE;
10058    }
10059})
10060
10061(define_expand "vlshr<mode>3"
10062  [(set (match_operand:VI48_256 0 "register_operand")
10063	(lshiftrt:VI48_256
10064	  (match_operand:VI48_256 1 "register_operand")
10065	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
10066  "TARGET_AVX2")
10067
10068(define_expand "vashr<mode>3"
10069  [(set (match_operand:VI128_128 0 "register_operand")
10070	(ashiftrt:VI128_128
10071	  (match_operand:VI128_128 1 "register_operand")
10072	  (match_operand:VI128_128 2 "nonimmediate_operand")))]
10073  "TARGET_XOP"
10074{
10075  rtx neg = gen_reg_rtx (<MODE>mode);
10076  emit_insn (gen_neg<mode>2 (neg, operands[2]));
10077  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
10078  DONE;
10079})
10080
10081(define_expand "vashrv4si3"
10082  [(set (match_operand:V4SI 0 "register_operand")
10083	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
10084		       (match_operand:V4SI 2 "nonimmediate_operand")))]
10085  "TARGET_AVX2 || TARGET_XOP"
10086{
10087  if (!TARGET_AVX2)
10088    {
10089      rtx neg = gen_reg_rtx (V4SImode);
10090      emit_insn (gen_negv4si2 (neg, operands[2]));
10091      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
10092      DONE;
10093    }
10094})
10095
10096(define_expand "vashrv8si3"
10097  [(set (match_operand:V8SI 0 "register_operand")
10098	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
10099		       (match_operand:V8SI 2 "nonimmediate_operand")))]
10100  "TARGET_AVX2")
10101
10102(define_expand "vashl<mode>3"
10103  [(set (match_operand:VI12_128 0 "register_operand")
10104	(ashift:VI12_128
10105	  (match_operand:VI12_128 1 "register_operand")
10106	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
10107  "TARGET_XOP"
10108{
10109  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10110  DONE;
10111})
10112
10113(define_expand "vashl<mode>3"
10114  [(set (match_operand:VI48_128 0 "register_operand")
10115	(ashift:VI48_128
10116	  (match_operand:VI48_128 1 "register_operand")
10117	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
10118  "TARGET_AVX2 || TARGET_XOP"
10119{
10120  if (!TARGET_AVX2)
10121    {
10122      operands[2] = force_reg (<MODE>mode, operands[2]);
10123      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10124      DONE;
10125    }
10126})
10127
10128(define_expand "vashl<mode>3"
10129  [(set (match_operand:VI48_256 0 "register_operand")
10130	(ashift:VI48_256
10131	  (match_operand:VI48_256 1 "register_operand")
10132	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
10133  "TARGET_AVX2")
10134
10135(define_insn "xop_sha<mode>3"
10136  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10137	(if_then_else:VI_128
10138	 (ge:VI_128
10139	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10140	  (const_int 0))
10141	 (ashift:VI_128
10142	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10143	  (match_dup 2))
10144	 (ashiftrt:VI_128
10145	  (match_dup 1)
10146	  (neg:VI_128 (match_dup 2)))))]
10147  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10148  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10149  [(set_attr "type" "sseishft")
10150   (set_attr "prefix_data16" "0")
10151   (set_attr "prefix_extra" "2")
10152   (set_attr "mode" "TI")])
10153
10154(define_insn "xop_shl<mode>3"
10155  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10156	(if_then_else:VI_128
10157	 (ge:VI_128
10158	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10159	  (const_int 0))
10160	 (ashift:VI_128
10161	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10162	  (match_dup 2))
10163	 (lshiftrt:VI_128
10164	  (match_dup 1)
10165	  (neg:VI_128 (match_dup 2)))))]
10166  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10167  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10168  [(set_attr "type" "sseishft")
10169   (set_attr "prefix_data16" "0")
10170   (set_attr "prefix_extra" "2")
10171   (set_attr "mode" "TI")])
10172
10173(define_expand "<shift_insn><mode>3"
10174  [(set (match_operand:VI1_AVX2 0 "register_operand")
10175	(any_shift:VI1_AVX2
10176	  (match_operand:VI1_AVX2 1 "register_operand")
10177	  (match_operand:SI 2 "nonmemory_operand")))]
10178  "TARGET_SSE2"
10179{
10180  if (TARGET_XOP && <MODE>mode == V16QImode)
10181    {
10182      bool negate = false;
10183      rtx (*gen) (rtx, rtx, rtx);
10184      rtx tmp, par;
10185      int i;
10186
10187      if (<CODE> != ASHIFT)
10188	{
10189	  if (CONST_INT_P (operands[2]))
10190	    operands[2] = GEN_INT (-INTVAL (operands[2]));
10191	  else
10192	    negate = true;
10193	}
10194      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
10195      for (i = 0; i < 16; i++)
10196        XVECEXP (par, 0, i) = operands[2];
10197
10198      tmp = gen_reg_rtx (V16QImode);
10199      emit_insn (gen_vec_initv16qi (tmp, par));
10200
10201      if (negate)
10202	emit_insn (gen_negv16qi2 (tmp, tmp));
10203
10204      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
10205      emit_insn (gen (operands[0], operands[1], tmp));
10206    }
10207  else
10208    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
10209  DONE;
10210})
10211
10212(define_expand "ashrv2di3"
10213  [(set (match_operand:V2DI 0 "register_operand")
10214	(ashiftrt:V2DI
10215	  (match_operand:V2DI 1 "register_operand")
10216	  (match_operand:DI 2 "nonmemory_operand")))]
10217  "TARGET_XOP"
10218{
10219  rtx reg = gen_reg_rtx (V2DImode);
10220  rtx par;
10221  bool negate = false;
10222  int i;
10223
10224  if (CONST_INT_P (operands[2]))
10225    operands[2] = GEN_INT (-INTVAL (operands[2]));
10226  else
10227    negate = true;
10228
10229  par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
10230  for (i = 0; i < 2; i++)
10231    XVECEXP (par, 0, i) = operands[2];
10232
10233  emit_insn (gen_vec_initv2di (reg, par));
10234
10235  if (negate)
10236    emit_insn (gen_negv2di2 (reg, reg));
10237
10238  emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
10239  DONE;
10240})
10241
10242;; XOP FRCZ support
10243(define_insn "xop_frcz<mode>2"
10244  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10245	(unspec:FMAMODE
10246	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10247	 UNSPEC_FRCZ))]
10248  "TARGET_XOP"
10249  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10250  [(set_attr "type" "ssecvt1")
10251   (set_attr "mode" "<MODE>")])
10252
10253(define_expand "xop_vmfrcz<mode>2"
10254  [(set (match_operand:VF_128 0 "register_operand")
10255	(vec_merge:VF_128
10256	  (unspec:VF_128
10257	   [(match_operand:VF_128 1 "nonimmediate_operand")]
10258	   UNSPEC_FRCZ)
10259	  (match_dup 2)
10260	  (const_int 1)))]
10261  "TARGET_XOP"
10262  "operands[2] = CONST0_RTX (<MODE>mode);")
10263
10264(define_insn "*xop_vmfrcz<mode>2"
10265  [(set (match_operand:VF_128 0 "register_operand" "=x")
10266	(vec_merge:VF_128
10267	  (unspec:VF_128
10268	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
10269	   UNSPEC_FRCZ)
10270	  (match_operand:VF_128 2 "const0_operand")
10271	  (const_int 1)))]
10272  "TARGET_XOP"
10273  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
10274  [(set_attr "type" "ssecvt1")
10275   (set_attr "mode" "<MODE>")])
10276
10277(define_insn "xop_maskcmp<mode>3"
10278  [(set (match_operand:VI_128 0 "register_operand" "=x")
10279	(match_operator:VI_128 1 "ix86_comparison_int_operator"
10280	 [(match_operand:VI_128 2 "register_operand" "x")
10281	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10282  "TARGET_XOP"
10283  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10284  [(set_attr "type" "sse4arg")
10285   (set_attr "prefix_data16" "0")
10286   (set_attr "prefix_rep" "0")
10287   (set_attr "prefix_extra" "2")
10288   (set_attr "length_immediate" "1")
10289   (set_attr "mode" "TI")])
10290
10291(define_insn "xop_maskcmp_uns<mode>3"
10292  [(set (match_operand:VI_128 0 "register_operand" "=x")
10293	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10294	 [(match_operand:VI_128 2 "register_operand" "x")
10295	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10296  "TARGET_XOP"
10297  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10298  [(set_attr "type" "ssecmp")
10299   (set_attr "prefix_data16" "0")
10300   (set_attr "prefix_rep" "0")
10301   (set_attr "prefix_extra" "2")
10302   (set_attr "length_immediate" "1")
10303   (set_attr "mode" "TI")])
10304
10305;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10306;; and pcomneu* not to be converted to the signed ones in case somebody needs
10307;; the exact instruction generated for the intrinsic.
10308(define_insn "xop_maskcmp_uns2<mode>3"
10309  [(set (match_operand:VI_128 0 "register_operand" "=x")
10310	(unspec:VI_128
10311	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10312	  [(match_operand:VI_128 2 "register_operand" "x")
10313	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
10314	 UNSPEC_XOP_UNSIGNED_CMP))]
10315  "TARGET_XOP"
10316  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10317  [(set_attr "type" "ssecmp")
10318   (set_attr "prefix_data16" "0")
10319   (set_attr "prefix_extra" "2")
10320   (set_attr "length_immediate" "1")
10321   (set_attr "mode" "TI")])
10322
10323;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
10324;; being added here to be complete.
10325(define_insn "xop_pcom_tf<mode>3"
10326  [(set (match_operand:VI_128 0 "register_operand" "=x")
10327	(unspec:VI_128
10328	  [(match_operand:VI_128 1 "register_operand" "x")
10329	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
10330	   (match_operand:SI 3 "const_int_operand" "n")]
10331	  UNSPEC_XOP_TRUEFALSE))]
10332  "TARGET_XOP"
10333{
10334  return ((INTVAL (operands[3]) != 0)
10335	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10336	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
10337}
10338  [(set_attr "type" "ssecmp")
10339   (set_attr "prefix_data16" "0")
10340   (set_attr "prefix_extra" "2")
10341   (set_attr "length_immediate" "1")
10342   (set_attr "mode" "TI")])
10343
10344(define_insn "xop_vpermil2<mode>3"
10345  [(set (match_operand:VF 0 "register_operand" "=x")
10346	(unspec:VF
10347	  [(match_operand:VF 1 "register_operand" "x")
10348	   (match_operand:VF 2 "nonimmediate_operand" "%x")
10349	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
10350	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
10351	  UNSPEC_VPERMIL2))]
10352  "TARGET_XOP"
10353  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10354  [(set_attr "type" "sse4arg")
10355   (set_attr "length_immediate" "1")
10356   (set_attr "mode" "<MODE>")])
10357
10358;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10359
10360(define_insn "aesenc"
10361  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10362	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10363		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10364		      UNSPEC_AESENC))]
10365  "TARGET_AES"
10366  "@
10367   aesenc\t{%2, %0|%0, %2}
10368   vaesenc\t{%2, %1, %0|%0, %1, %2}"
10369  [(set_attr "isa" "noavx,avx")
10370   (set_attr "type" "sselog1")
10371   (set_attr "prefix_extra" "1")
10372   (set_attr "prefix" "orig,vex")
10373   (set_attr "btver2_decode" "double,double")
10374   (set_attr "mode" "TI")])
10375
10376(define_insn "aesenclast"
10377  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10378	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10379		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10380		      UNSPEC_AESENCLAST))]
10381  "TARGET_AES"
10382  "@
10383   aesenclast\t{%2, %0|%0, %2}
10384   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10385  [(set_attr "isa" "noavx,avx")
10386   (set_attr "type" "sselog1")
10387   (set_attr "prefix_extra" "1")
10388   (set_attr "prefix" "orig,vex")
10389   (set_attr "btver2_decode" "double,double")
10390   (set_attr "mode" "TI")])
10391
10392(define_insn "aesdec"
10393  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10394	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10395		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10396		      UNSPEC_AESDEC))]
10397  "TARGET_AES"
10398  "@
10399   aesdec\t{%2, %0|%0, %2}
10400   vaesdec\t{%2, %1, %0|%0, %1, %2}"
10401  [(set_attr "isa" "noavx,avx")
10402   (set_attr "type" "sselog1")
10403   (set_attr "prefix_extra" "1")
10404   (set_attr "prefix" "orig,vex")
10405   (set_attr "btver2_decode" "double,double")
10406   (set_attr "mode" "TI")])
10407
10408(define_insn "aesdeclast"
10409  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10410	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10411		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10412		      UNSPEC_AESDECLAST))]
10413  "TARGET_AES"
10414  "@
10415   aesdeclast\t{%2, %0|%0, %2}
10416   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10417  [(set_attr "isa" "noavx,avx")
10418   (set_attr "type" "sselog1")
10419   (set_attr "prefix_extra" "1")
10420   (set_attr "prefix" "orig,vex")
10421   (set_attr "btver2_decode" "double,double")
10422   (set_attr "mode" "TI")])
10423
10424(define_insn "aesimc"
10425  [(set (match_operand:V2DI 0 "register_operand" "=x")
10426	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10427		      UNSPEC_AESIMC))]
10428  "TARGET_AES"
10429  "%vaesimc\t{%1, %0|%0, %1}"
10430  [(set_attr "type" "sselog1")
10431   (set_attr "prefix_extra" "1")
10432   (set_attr "prefix" "maybe_vex")
10433   (set_attr "mode" "TI")])
10434
10435(define_insn "aeskeygenassist"
10436  [(set (match_operand:V2DI 0 "register_operand" "=x")
10437	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10438		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
10439		     UNSPEC_AESKEYGENASSIST))]
10440  "TARGET_AES"
10441  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10442  [(set_attr "type" "sselog1")
10443   (set_attr "prefix_extra" "1")
10444   (set_attr "length_immediate" "1")
10445   (set_attr "prefix" "maybe_vex")
10446   (set_attr "mode" "TI")])
10447
10448(define_insn "pclmulqdq"
10449  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10450	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10451		      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
10452		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10453		     UNSPEC_PCLMUL))]
10454  "TARGET_PCLMUL"
10455  "@
10456   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
10457   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10458  [(set_attr "isa" "noavx,avx")
10459   (set_attr "type" "sselog1")
10460   (set_attr "prefix_extra" "1")
10461   (set_attr "length_immediate" "1")
10462   (set_attr "prefix" "orig,vex")
10463   (set_attr "mode" "TI")])
10464
10465(define_expand "avx_vzeroall"
10466  [(match_par_dup 0 [(const_int 0)])]
10467  "TARGET_AVX"
10468{
10469  int nregs = TARGET_64BIT ? 16 : 8;
10470  int regno;
10471
10472  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10473
10474  XVECEXP (operands[0], 0, 0)
10475    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10476			       UNSPECV_VZEROALL);
10477
10478  for (regno = 0; regno < nregs; regno++)
10479    XVECEXP (operands[0], 0, regno + 1)
10480      = gen_rtx_SET (VOIDmode,
10481		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10482		     CONST0_RTX (V8SImode));
10483})
10484
10485(define_insn "*avx_vzeroall"
10486  [(match_parallel 0 "vzeroall_operation"
10487    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10488  "TARGET_AVX"
10489  "vzeroall"
10490  [(set_attr "type" "sse")
10491   (set_attr "modrm" "0")
10492   (set_attr "memory" "none")
10493   (set_attr "prefix" "vex")
10494   (set_attr "btver2_decode" "vector")
10495   (set_attr "mode" "OI")])
10496
10497;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10498;; if the upper 128bits are unused.
10499(define_insn "avx_vzeroupper"
10500  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
10501  "TARGET_AVX"
10502  "vzeroupper"
10503  [(set_attr "type" "sse")
10504   (set_attr "modrm" "0")
10505   (set_attr "memory" "none")
10506   (set_attr "prefix" "vex")
10507   (set_attr "btver2_decode" "vector")
10508   (set_attr "mode" "OI")])
10509
10510(define_mode_attr AVXTOSSEMODE
10511  [(V4DI "V2DI") (V2DI "V2DI")
10512   (V8SI "V4SI") (V4SI "V4SI")
10513   (V16HI "V8HI") (V8HI "V8HI")
10514   (V32QI "V16QI") (V16QI "V16QI")])
10515
10516(define_insn "avx2_pbroadcast<mode>"
10517  [(set (match_operand:VI 0 "register_operand" "=x")
10518	(vec_duplicate:VI
10519	  (vec_select:<ssescalarmode>
10520	    (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
10521	    (parallel [(const_int 0)]))))]
10522  "TARGET_AVX2"
10523  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
10524  [(set_attr "type" "ssemov")
10525   (set_attr "prefix_extra" "1")
10526   (set_attr "prefix" "vex")
10527   (set_attr "mode" "<sseinsnmode>")])
10528
10529(define_insn "avx2_pbroadcast<mode>_1"
10530  [(set (match_operand:VI_256 0 "register_operand" "=x")
10531	(vec_duplicate:VI_256
10532	  (vec_select:<ssescalarmode>
10533	    (match_operand:VI_256 1 "nonimmediate_operand" "xm")
10534	    (parallel [(const_int 0)]))))]
10535  "TARGET_AVX2"
10536  "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
10537  [(set_attr "type" "ssemov")
10538   (set_attr "prefix_extra" "1")
10539   (set_attr "prefix" "vex")
10540   (set_attr "mode" "<sseinsnmode>")])
10541
10542(define_insn "avx2_permvar<mode>"
10543  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10544	(unspec:VI4F_256
10545	  [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
10546	   (match_operand:V8SI 2 "register_operand" "x")]
10547	  UNSPEC_VPERMVAR))]
10548  "TARGET_AVX2"
10549  "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
10550  [(set_attr "type" "sselog")
10551   (set_attr "prefix" "vex")
10552   (set_attr "mode" "OI")])
10553
10554(define_expand "avx2_perm<mode>"
10555  [(match_operand:VI8F_256 0 "register_operand")
10556   (match_operand:VI8F_256 1 "nonimmediate_operand")
10557   (match_operand:SI 2 "const_0_to_255_operand")]
10558  "TARGET_AVX2"
10559{
10560  int mask = INTVAL (operands[2]);
10561  emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
10562				    GEN_INT ((mask >> 0) & 3),
10563				    GEN_INT ((mask >> 2) & 3),
10564				    GEN_INT ((mask >> 4) & 3),
10565				    GEN_INT ((mask >> 6) & 3)));
10566  DONE;
10567})
10568
10569(define_insn "avx2_perm<mode>_1"
10570  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10571	(vec_select:VI8F_256
10572	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm")
10573	  (parallel [(match_operand 2 "const_0_to_3_operand")
10574		     (match_operand 3 "const_0_to_3_operand")
10575		     (match_operand 4 "const_0_to_3_operand")
10576		     (match_operand 5 "const_0_to_3_operand")])))]
10577  "TARGET_AVX2"
10578{
10579  int mask = 0;
10580  mask |= INTVAL (operands[2]) << 0;
10581  mask |= INTVAL (operands[3]) << 2;
10582  mask |= INTVAL (operands[4]) << 4;
10583  mask |= INTVAL (operands[5]) << 6;
10584  operands[2] = GEN_INT (mask);
10585  return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10586}
10587  [(set_attr "type" "sselog")
10588   (set_attr "prefix" "vex")
10589   (set_attr "mode" "<sseinsnmode>")])
10590
10591(define_insn "avx2_permv2ti"
10592  [(set (match_operand:V4DI 0 "register_operand" "=x")
10593	(unspec:V4DI
10594	  [(match_operand:V4DI 1 "register_operand" "x")
10595	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
10596	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
10597	  UNSPEC_VPERMTI))]
10598  "TARGET_AVX2"
10599  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10600  [(set_attr "type" "sselog")
10601   (set_attr "prefix" "vex")
10602   (set_attr "mode" "OI")])
10603
10604(define_insn "avx2_vec_dupv4df"
10605  [(set (match_operand:V4DF 0 "register_operand" "=x")
10606	(vec_duplicate:V4DF
10607	  (vec_select:DF
10608	    (match_operand:V2DF 1 "register_operand" "x")
10609	    (parallel [(const_int 0)]))))]
10610  "TARGET_AVX2"
10611  "vbroadcastsd\t{%1, %0|%0, %1}"
10612  [(set_attr "type" "sselog1")
10613   (set_attr "prefix" "vex")
10614   (set_attr "mode" "V4DF")])
10615
10616;; Modes handled by AVX vec_dup patterns.
10617(define_mode_iterator AVX_VEC_DUP_MODE
10618  [V8SI V8SF V4DI V4DF])
10619
10620(define_insn "vec_dup<mode>"
10621  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
10622	(vec_duplicate:AVX_VEC_DUP_MODE
10623	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
10624  "TARGET_AVX"
10625  "@
10626   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10627   vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
10628   #"
10629  [(set_attr "type" "ssemov")
10630   (set_attr "prefix_extra" "1")
10631   (set_attr "prefix" "vex")
10632   (set_attr "isa" "*,avx2,noavx2")
10633   (set_attr "mode" "V8SF")])
10634
10635(define_insn "avx2_vbroadcasti128_<mode>"
10636  [(set (match_operand:VI_256 0 "register_operand" "=x")
10637	(vec_concat:VI_256
10638	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
10639	  (match_dup 1)))]
10640  "TARGET_AVX2"
10641  "vbroadcasti128\t{%1, %0|%0, %1}"
10642  [(set_attr "type" "ssemov")
10643   (set_attr "prefix_extra" "1")
10644   (set_attr "prefix" "vex")
10645   (set_attr "mode" "OI")])
10646
10647(define_split
10648  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
10649	(vec_duplicate:AVX_VEC_DUP_MODE
10650	  (match_operand:<ssescalarmode> 1 "register_operand")))]
10651  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
10652  [(set (match_dup 2)
10653	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
10654   (set (match_dup 0)
10655	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
10656  "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
10657
10658(define_insn "avx_vbroadcastf128_<mode>"
10659  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
10660	(vec_concat:V_256
10661	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10662	  (match_dup 1)))]
10663  "TARGET_AVX"
10664  "@
10665   vbroadcast<i128>\t{%1, %0|%0, %1}
10666   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
10667   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10668  [(set_attr "type" "ssemov,sselog1,sselog1")
10669   (set_attr "prefix_extra" "1")
10670   (set_attr "length_immediate" "0,1,1")
10671   (set_attr "prefix" "vex")
10672   (set_attr "mode" "<sseinsnmode>")])
10673
10674;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10675;; If it so happens that the input is in memory, use vbroadcast.
10676;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10677(define_insn "*avx_vperm_broadcast_v4sf"
10678  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10679	(vec_select:V4SF
10680	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10681	  (match_parallel 2 "avx_vbroadcast_operand"
10682	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
10683  "TARGET_AVX"
10684{
10685  int elt = INTVAL (operands[3]);
10686  switch (which_alternative)
10687    {
10688    case 0:
10689    case 1:
10690      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10691      return "vbroadcastss\t{%1, %0|%0, %1}";
10692    case 2:
10693      operands[2] = GEN_INT (elt * 0x55);
10694      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10695    default:
10696      gcc_unreachable ();
10697    }
10698}
10699  [(set_attr "type" "ssemov,ssemov,sselog1")
10700   (set_attr "prefix_extra" "1")
10701   (set_attr "length_immediate" "0,0,1")
10702   (set_attr "prefix" "vex")
10703   (set_attr "mode" "SF,SF,V4SF")])
10704
10705(define_insn_and_split "*avx_vperm_broadcast_<mode>"
10706  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
10707	(vec_select:VF_256
10708	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
10709	  (match_parallel 2 "avx_vbroadcast_operand"
10710	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
10711  "TARGET_AVX"
10712  "#"
10713  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
10714  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
10715{
10716  rtx op0 = operands[0], op1 = operands[1];
10717  int elt = INTVAL (operands[3]);
10718
10719  if (REG_P (op1))
10720    {
10721      int mask;
10722
10723      if (TARGET_AVX2 && elt == 0)
10724	{
10725	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
10726							  op1)));
10727	  DONE;
10728	}
10729
10730      /* Shuffle element we care about into all elements of the 128-bit lane.
10731	 The other lane gets shuffled too, but we don't care.  */
10732      if (<MODE>mode == V4DFmode)
10733	mask = (elt & 1 ? 15 : 0);
10734      else
10735	mask = (elt & 3) * 0x55;
10736      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10737
10738      /* Shuffle the lane we care about into both lanes of the dest.  */
10739      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10740      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10741      DONE;
10742    }
10743
10744  operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
10745				   elt * GET_MODE_SIZE (<ssescalarmode>mode));
10746})
10747
10748(define_expand "avx_vpermil<mode>"
10749  [(set (match_operand:VF2 0 "register_operand")
10750	(vec_select:VF2
10751	  (match_operand:VF2 1 "nonimmediate_operand")
10752	  (match_operand:SI 2 "const_0_to_255_operand")))]
10753  "TARGET_AVX"
10754{
10755  int mask = INTVAL (operands[2]);
10756  rtx perm[<ssescalarnum>];
10757
10758  perm[0] = GEN_INT (mask & 1);
10759  perm[1] = GEN_INT ((mask >> 1) & 1);
10760  if (<MODE>mode == V4DFmode)
10761    {
10762      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10763      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10764    }
10765
10766  operands[2]
10767    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10768})
10769
10770(define_expand "avx_vpermil<mode>"
10771  [(set (match_operand:VF1 0 "register_operand")
10772	(vec_select:VF1
10773	  (match_operand:VF1 1 "nonimmediate_operand")
10774	  (match_operand:SI 2 "const_0_to_255_operand")))]
10775  "TARGET_AVX"
10776{
10777  int mask = INTVAL (operands[2]);
10778  rtx perm[<ssescalarnum>];
10779
10780  perm[0] = GEN_INT (mask & 3);
10781  perm[1] = GEN_INT ((mask >> 2) & 3);
10782  perm[2] = GEN_INT ((mask >> 4) & 3);
10783  perm[3] = GEN_INT ((mask >> 6) & 3);
10784  if (<MODE>mode == V8SFmode)
10785    {
10786      perm[4] = GEN_INT ((mask & 3) + 4);
10787      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10788      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10789      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10790    }
10791
10792  operands[2]
10793    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10794})
10795
10796(define_insn "*avx_vpermilp<mode>"
10797  [(set (match_operand:VF 0 "register_operand" "=x")
10798	(vec_select:VF
10799	  (match_operand:VF 1 "nonimmediate_operand" "xm")
10800	  (match_parallel 2 ""
10801	    [(match_operand 3 "const_int_operand")])))]
10802  "TARGET_AVX
10803   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10804{
10805  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10806  operands[2] = GEN_INT (mask);
10807  return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10808}
10809  [(set_attr "type" "sselog")
10810   (set_attr "prefix_extra" "1")
10811   (set_attr "length_immediate" "1")
10812   (set_attr "prefix" "vex")
10813   (set_attr "mode" "<MODE>")])
10814
10815(define_insn "avx_vpermilvar<mode>3"
10816  [(set (match_operand:VF 0 "register_operand" "=x")
10817	(unspec:VF
10818	  [(match_operand:VF 1 "register_operand" "x")
10819	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10820	  UNSPEC_VPERMIL))]
10821  "TARGET_AVX"
10822  "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10823  [(set_attr "type" "sselog")
10824   (set_attr "prefix_extra" "1")
10825   (set_attr "prefix" "vex")
10826   (set_attr "btver2_decode" "vector")
10827   (set_attr "mode" "<MODE>")])
10828
10829(define_expand "avx_vperm2f128<mode>3"
10830  [(set (match_operand:AVX256MODE2P 0 "register_operand")
10831	(unspec:AVX256MODE2P
10832	  [(match_operand:AVX256MODE2P 1 "register_operand")
10833	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
10834	   (match_operand:SI 3 "const_0_to_255_operand")]
10835	  UNSPEC_VPERMIL2F128))]
10836  "TARGET_AVX"
10837{
10838  int mask = INTVAL (operands[3]);
10839  if ((mask & 0x88) == 0)
10840    {
10841      rtx perm[<ssescalarnum>], t1, t2;
10842      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10843
10844      base = (mask & 3) * nelt2;
10845      for (i = 0; i < nelt2; ++i)
10846	perm[i] = GEN_INT (base + i);
10847
10848      base = ((mask >> 4) & 3) * nelt2;
10849      for (i = 0; i < nelt2; ++i)
10850	perm[i + nelt2] = GEN_INT (base + i);
10851
10852      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10853			       operands[1], operands[2]);
10854      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10855      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10856      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10857      emit_insn (t2);
10858      DONE;
10859    }
10860})
10861
10862;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10863;; means that in order to represent this properly in rtl we'd have to
10864;; nest *another* vec_concat with a zero operand and do the select from
10865;; a 4x wide vector.  That doesn't seem very nice.
10866(define_insn "*avx_vperm2f128<mode>_full"
10867  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10868	(unspec:AVX256MODE2P
10869	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10870	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10871	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
10872	  UNSPEC_VPERMIL2F128))]
10873  "TARGET_AVX"
10874  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10875  [(set_attr "type" "sselog")
10876   (set_attr "prefix_extra" "1")
10877   (set_attr "length_immediate" "1")
10878   (set_attr "prefix" "vex")
10879   (set_attr "mode" "<sseinsnmode>")])
10880
10881(define_insn "*avx_vperm2f128<mode>_nozero"
10882  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10883	(vec_select:AVX256MODE2P
10884	  (vec_concat:<ssedoublevecmode>
10885	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
10886	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10887	  (match_parallel 3 ""
10888	    [(match_operand 4 "const_int_operand")])))]
10889  "TARGET_AVX
10890   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10891{
10892  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10893  if (mask == 0x12)
10894    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
10895  if (mask == 0x20)
10896    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
10897  operands[3] = GEN_INT (mask);
10898  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10899}
10900  [(set_attr "type" "sselog")
10901   (set_attr "prefix_extra" "1")
10902   (set_attr "length_immediate" "1")
10903   (set_attr "prefix" "vex")
10904   (set_attr "mode" "<sseinsnmode>")])
10905
10906(define_expand "avx_vinsertf128<mode>"
10907  [(match_operand:V_256 0 "register_operand")
10908   (match_operand:V_256 1 "register_operand")
10909   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
10910   (match_operand:SI 3 "const_0_to_1_operand")]
10911  "TARGET_AVX"
10912{
10913  rtx (*insn)(rtx, rtx, rtx);
10914
10915  switch (INTVAL (operands[3]))
10916    {
10917    case 0:
10918      insn = gen_vec_set_lo_<mode>;
10919      break;
10920    case 1:
10921      insn = gen_vec_set_hi_<mode>;
10922      break;
10923    default:
10924      gcc_unreachable ();
10925    }
10926
10927  emit_insn (insn (operands[0], operands[1], operands[2]));
10928  DONE;
10929})
10930
10931(define_insn "avx2_vec_set_lo_v4di"
10932  [(set (match_operand:V4DI 0 "register_operand" "=x")
10933	(vec_concat:V4DI
10934	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")
10935	  (vec_select:V2DI
10936	    (match_operand:V4DI 1 "register_operand" "x")
10937	    (parallel [(const_int 2) (const_int 3)]))))]
10938  "TARGET_AVX2"
10939  "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10940  [(set_attr "type" "sselog")
10941   (set_attr "prefix_extra" "1")
10942   (set_attr "length_immediate" "1")
10943   (set_attr "prefix" "vex")
10944   (set_attr "mode" "OI")])
10945
10946(define_insn "avx2_vec_set_hi_v4di"
10947  [(set (match_operand:V4DI 0 "register_operand" "=x")
10948	(vec_concat:V4DI
10949	  (vec_select:V2DI
10950	    (match_operand:V4DI 1 "register_operand" "x")
10951	    (parallel [(const_int 0) (const_int 1)]))
10952	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
10953  "TARGET_AVX2"
10954  "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10955  [(set_attr "type" "sselog")
10956   (set_attr "prefix_extra" "1")
10957   (set_attr "length_immediate" "1")
10958   (set_attr "prefix" "vex")
10959   (set_attr "mode" "OI")])
10960
10961(define_insn "vec_set_lo_<mode>"
10962  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10963	(vec_concat:VI8F_256
10964	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10965	  (vec_select:<ssehalfvecmode>
10966	    (match_operand:VI8F_256 1 "register_operand" "x")
10967	    (parallel [(const_int 2) (const_int 3)]))))]
10968  "TARGET_AVX"
10969  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10970  [(set_attr "type" "sselog")
10971   (set_attr "prefix_extra" "1")
10972   (set_attr "length_immediate" "1")
10973   (set_attr "prefix" "vex")
10974   (set_attr "mode" "<sseinsnmode>")])
10975
10976(define_insn "vec_set_hi_<mode>"
10977  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10978	(vec_concat:VI8F_256
10979	  (vec_select:<ssehalfvecmode>
10980	    (match_operand:VI8F_256 1 "register_operand" "x")
10981	    (parallel [(const_int 0) (const_int 1)]))
10982	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10983  "TARGET_AVX"
10984  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10985  [(set_attr "type" "sselog")
10986   (set_attr "prefix_extra" "1")
10987   (set_attr "length_immediate" "1")
10988   (set_attr "prefix" "vex")
10989   (set_attr "mode" "<sseinsnmode>")])
10990
10991(define_insn "vec_set_lo_<mode>"
10992  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10993	(vec_concat:VI4F_256
10994	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10995	  (vec_select:<ssehalfvecmode>
10996	    (match_operand:VI4F_256 1 "register_operand" "x")
10997	    (parallel [(const_int 4) (const_int 5)
10998		       (const_int 6) (const_int 7)]))))]
10999  "TARGET_AVX"
11000  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11001  [(set_attr "type" "sselog")
11002   (set_attr "prefix_extra" "1")
11003   (set_attr "length_immediate" "1")
11004   (set_attr "prefix" "vex")
11005   (set_attr "mode" "<sseinsnmode>")])
11006
11007(define_insn "vec_set_hi_<mode>"
11008  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11009	(vec_concat:VI4F_256
11010	  (vec_select:<ssehalfvecmode>
11011	    (match_operand:VI4F_256 1 "register_operand" "x")
11012	    (parallel [(const_int 0) (const_int 1)
11013		       (const_int 2) (const_int 3)]))
11014	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11015  "TARGET_AVX"
11016  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11017  [(set_attr "type" "sselog")
11018   (set_attr "prefix_extra" "1")
11019   (set_attr "length_immediate" "1")
11020   (set_attr "prefix" "vex")
11021   (set_attr "mode" "<sseinsnmode>")])
11022
11023(define_insn "vec_set_lo_v16hi"
11024  [(set (match_operand:V16HI 0 "register_operand" "=x")
11025	(vec_concat:V16HI
11026	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11027	  (vec_select:V8HI
11028	    (match_operand:V16HI 1 "register_operand" "x")
11029	    (parallel [(const_int 8) (const_int 9)
11030		       (const_int 10) (const_int 11)
11031		       (const_int 12) (const_int 13)
11032		       (const_int 14) (const_int 15)]))))]
11033  "TARGET_AVX"
11034  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11035  [(set_attr "type" "sselog")
11036   (set_attr "prefix_extra" "1")
11037   (set_attr "length_immediate" "1")
11038   (set_attr "prefix" "vex")
11039   (set_attr "mode" "OI")])
11040
11041(define_insn "vec_set_hi_v16hi"
11042  [(set (match_operand:V16HI 0 "register_operand" "=x")
11043	(vec_concat:V16HI
11044	  (vec_select:V8HI
11045	    (match_operand:V16HI 1 "register_operand" "x")
11046	    (parallel [(const_int 0) (const_int 1)
11047		       (const_int 2) (const_int 3)
11048		       (const_int 4) (const_int 5)
11049		       (const_int 6) (const_int 7)]))
11050	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11051  "TARGET_AVX"
11052  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11053  [(set_attr "type" "sselog")
11054   (set_attr "prefix_extra" "1")
11055   (set_attr "length_immediate" "1")
11056   (set_attr "prefix" "vex")
11057   (set_attr "mode" "OI")])
11058
11059(define_insn "vec_set_lo_v32qi"
11060  [(set (match_operand:V32QI 0 "register_operand" "=x")
11061	(vec_concat:V32QI
11062	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11063	  (vec_select:V16QI
11064	    (match_operand:V32QI 1 "register_operand" "x")
11065	    (parallel [(const_int 16) (const_int 17)
11066		       (const_int 18) (const_int 19)
11067		       (const_int 20) (const_int 21)
11068		       (const_int 22) (const_int 23)
11069		       (const_int 24) (const_int 25)
11070		       (const_int 26) (const_int 27)
11071		       (const_int 28) (const_int 29)
11072		       (const_int 30) (const_int 31)]))))]
11073  "TARGET_AVX"
11074  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11075  [(set_attr "type" "sselog")
11076   (set_attr "prefix_extra" "1")
11077   (set_attr "length_immediate" "1")
11078   (set_attr "prefix" "vex")
11079   (set_attr "mode" "OI")])
11080
11081(define_insn "vec_set_hi_v32qi"
11082  [(set (match_operand:V32QI 0 "register_operand" "=x")
11083	(vec_concat:V32QI
11084	  (vec_select:V16QI
11085	    (match_operand:V32QI 1 "register_operand" "x")
11086	    (parallel [(const_int 0) (const_int 1)
11087		       (const_int 2) (const_int 3)
11088		       (const_int 4) (const_int 5)
11089		       (const_int 6) (const_int 7)
11090		       (const_int 8) (const_int 9)
11091		       (const_int 10) (const_int 11)
11092		       (const_int 12) (const_int 13)
11093		       (const_int 14) (const_int 15)]))
11094	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11095  "TARGET_AVX"
11096  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11097  [(set_attr "type" "sselog")
11098   (set_attr "prefix_extra" "1")
11099   (set_attr "length_immediate" "1")
11100   (set_attr "prefix" "vex")
11101   (set_attr "mode" "OI")])
11102
11103(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11104  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
11105	(unspec:V48_AVX2
11106	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
11107	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
11108	  UNSPEC_MASKMOV))]
11109  "TARGET_AVX"
11110  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11111  [(set_attr "type" "sselog1")
11112   (set_attr "prefix_extra" "1")
11113   (set_attr "prefix" "vex")
11114   (set_attr "btver2_decode" "vector")
11115   (set_attr "mode" "<sseinsnmode>")])
11116
11117(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11118  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
11119	(unspec:V48_AVX2
11120	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
11121	   (match_operand:V48_AVX2 2 "register_operand" "x")
11122	   (match_dup 0)]
11123	  UNSPEC_MASKMOV))]
11124  "TARGET_AVX"
11125  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11126  [(set_attr "type" "sselog1")
11127   (set_attr "prefix_extra" "1")
11128   (set_attr "prefix" "vex")
11129   (set_attr "btver2_decode" "vector")
11130   (set_attr "mode" "<sseinsnmode>")])
11131
11132(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11133  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11134	(unspec:AVX256MODE2P
11135	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11136	  UNSPEC_CAST))]
11137  "TARGET_AVX"
11138  "#"
11139  "&& reload_completed"
11140  [(const_int 0)]
11141{
11142  rtx op0 = operands[0];
11143  rtx op1 = operands[1];
11144  if (REG_P (op0))
11145    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11146  else
11147    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11148  emit_move_insn (op0, op1);
11149  DONE;
11150})
11151
11152(define_expand "vec_init<mode>"
11153  [(match_operand:V_256 0 "register_operand")
11154   (match_operand 1)]
11155  "TARGET_AVX"
11156{
11157  ix86_expand_vector_init (false, operands[0], operands[1]);
11158  DONE;
11159})
11160
11161(define_expand "avx2_extracti128"
11162  [(match_operand:V2DI 0 "nonimmediate_operand")
11163   (match_operand:V4DI 1 "register_operand")
11164   (match_operand:SI 2 "const_0_to_1_operand")]
11165  "TARGET_AVX2"
11166{
11167  rtx (*insn)(rtx, rtx);
11168
11169  switch (INTVAL (operands[2]))
11170    {
11171    case 0:
11172      insn = gen_vec_extract_lo_v4di;
11173      break;
11174    case 1:
11175      insn = gen_vec_extract_hi_v4di;
11176      break;
11177    default:
11178      gcc_unreachable ();
11179    }
11180
11181  emit_insn (insn (operands[0], operands[1]));
11182  DONE;
11183})
11184
11185(define_expand "avx2_inserti128"
11186  [(match_operand:V4DI 0 "register_operand")
11187   (match_operand:V4DI 1 "register_operand")
11188   (match_operand:V2DI 2 "nonimmediate_operand")
11189   (match_operand:SI 3 "const_0_to_1_operand")]
11190  "TARGET_AVX2"
11191{
11192  rtx (*insn)(rtx, rtx, rtx);
11193
11194  switch (INTVAL (operands[3]))
11195    {
11196    case 0:
11197      insn = gen_avx2_vec_set_lo_v4di;
11198      break;
11199    case 1:
11200      insn = gen_avx2_vec_set_hi_v4di;
11201      break;
11202    default:
11203      gcc_unreachable ();
11204    }
11205
11206  emit_insn (insn (operands[0], operands[1], operands[2]));
11207  DONE;
11208})
11209
11210(define_insn "avx2_ashrv<mode>"
11211  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11212	(ashiftrt:VI4_AVX2
11213	  (match_operand:VI4_AVX2 1 "register_operand" "x")
11214	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
11215  "TARGET_AVX2"
11216  "vpsravd\t{%2, %1, %0|%0, %1, %2}"
11217  [(set_attr "type" "sseishft")
11218   (set_attr "prefix" "vex")
11219   (set_attr "mode" "<sseinsnmode>")])
11220
11221(define_insn "avx2_<shift_insn>v<mode>"
11222  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
11223	(any_lshift:VI48_AVX2
11224	  (match_operand:VI48_AVX2 1 "register_operand" "x")
11225	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
11226  "TARGET_AVX2"
11227  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11228  [(set_attr "type" "sseishft")
11229   (set_attr "prefix" "vex")
11230   (set_attr "mode" "<sseinsnmode>")])
11231
11232(define_insn "avx_vec_concat<mode>"
11233  [(set (match_operand:V_256 0 "register_operand" "=x,x")
11234	(vec_concat:V_256
11235	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
11236	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
11237  "TARGET_AVX"
11238{
11239  switch (which_alternative)
11240    {
11241    case 0:
11242      return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11243    case 1:
11244      switch (get_attr_mode (insn))
11245	{
11246	case MODE_V8SF:
11247	  return "vmovaps\t{%1, %x0|%x0, %1}";
11248	case MODE_V4DF:
11249	  return "vmovapd\t{%1, %x0|%x0, %1}";
11250	default:
11251	  return "vmovdqa\t{%1, %x0|%x0, %1}";
11252	}
11253    default:
11254      gcc_unreachable ();
11255    }
11256}
11257  [(set_attr "type" "sselog,ssemov")
11258   (set_attr "prefix_extra" "1,*")
11259   (set_attr "length_immediate" "1,*")
11260   (set_attr "prefix" "vex")
11261   (set_attr "mode" "<sseinsnmode>")])
11262
11263(define_insn "vcvtph2ps"
11264  [(set (match_operand:V4SF 0 "register_operand" "=x")
11265	(vec_select:V4SF
11266	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11267		       UNSPEC_VCVTPH2PS)
11268	  (parallel [(const_int 0) (const_int 1)
11269		     (const_int 2) (const_int 3)])))]
11270  "TARGET_F16C"
11271  "vcvtph2ps\t{%1, %0|%0, %1}"
11272  [(set_attr "type" "ssecvt")
11273   (set_attr "prefix" "vex")
11274   (set_attr "mode" "V4SF")])
11275
11276(define_insn "*vcvtph2ps_load"
11277  [(set (match_operand:V4SF 0 "register_operand" "=x")
11278	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11279		     UNSPEC_VCVTPH2PS))]
11280  "TARGET_F16C"
11281  "vcvtph2ps\t{%1, %0|%0, %1}"
11282  [(set_attr "type" "ssecvt")
11283   (set_attr "prefix" "vex")
11284   (set_attr "mode" "V8SF")])
11285
11286(define_insn "vcvtph2ps256"
11287  [(set (match_operand:V8SF 0 "register_operand" "=x")
11288	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11289		     UNSPEC_VCVTPH2PS))]
11290  "TARGET_F16C"
11291  "vcvtph2ps\t{%1, %0|%0, %1}"
11292  [(set_attr "type" "ssecvt")
11293   (set_attr "prefix" "vex")
11294   (set_attr "btver2_decode" "double")
11295   (set_attr "mode" "V8SF")])
11296
11297(define_expand "vcvtps2ph"
11298  [(set (match_operand:V8HI 0 "register_operand")
11299	(vec_concat:V8HI
11300	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
11301			(match_operand:SI 2 "const_0_to_255_operand")]
11302		       UNSPEC_VCVTPS2PH)
11303	  (match_dup 3)))]
11304  "TARGET_F16C"
11305  "operands[3] = CONST0_RTX (V4HImode);")
11306
11307(define_insn "*vcvtps2ph"
11308  [(set (match_operand:V8HI 0 "register_operand" "=x")
11309	(vec_concat:V8HI
11310	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11311			(match_operand:SI 2 "const_0_to_255_operand" "N")]
11312		       UNSPEC_VCVTPS2PH)
11313	  (match_operand:V4HI 3 "const0_operand")))]
11314  "TARGET_F16C"
11315  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11316  [(set_attr "type" "ssecvt")
11317   (set_attr "prefix" "vex")
11318   (set_attr "mode" "V4SF")])
11319
11320(define_insn "*vcvtps2ph_store"
11321  [(set (match_operand:V4HI 0 "memory_operand" "=m")
11322	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11323		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
11324		     UNSPEC_VCVTPS2PH))]
11325  "TARGET_F16C"
11326  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11327  [(set_attr "type" "ssecvt")
11328   (set_attr "prefix" "vex")
11329   (set_attr "mode" "V4SF")])
11330
11331(define_insn "vcvtps2ph256"
11332  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11333	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11334		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
11335		     UNSPEC_VCVTPS2PH))]
11336  "TARGET_F16C"
11337  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11338  [(set_attr "type" "ssecvt")
11339   (set_attr "prefix" "vex")
11340   (set_attr "btver2_decode" "vector")
11341   (set_attr "mode" "V8SF")])
11342
11343;; For gather* insn patterns
11344(define_mode_iterator VEC_GATHER_MODE
11345		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
11346(define_mode_attr VEC_GATHER_IDXSI
11347		      [(V2DI "V4SI") (V2DF "V4SI")
11348		       (V4DI "V4SI") (V4DF "V4SI")
11349		       (V4SI "V4SI") (V4SF "V4SI")
11350		       (V8SI "V8SI") (V8SF "V8SI")])
11351(define_mode_attr VEC_GATHER_IDXDI
11352		      [(V2DI "V2DI") (V2DF "V2DI")
11353		       (V4DI "V4DI") (V4DF "V4DI")
11354		       (V4SI "V2DI") (V4SF "V2DI")
11355		       (V8SI "V4DI") (V8SF "V4DI")])
11356(define_mode_attr VEC_GATHER_SRCDI
11357		      [(V2DI "V2DI") (V2DF "V2DF")
11358		       (V4DI "V4DI") (V4DF "V4DF")
11359		       (V4SI "V4SI") (V4SF "V4SF")
11360		       (V8SI "V4SI") (V8SF "V4SF")])
11361
11362(define_expand "avx2_gathersi<mode>"
11363  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11364		   (unspec:VEC_GATHER_MODE
11365		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
11366		      (mem:<ssescalarmode>
11367			(match_par_dup 7
11368			  [(match_operand 2 "vsib_address_operand")
11369			   (match_operand:<VEC_GATHER_IDXSI>
11370			      3 "register_operand")
11371			   (match_operand:SI 5 "const1248_operand ")]))
11372		      (mem:BLK (scratch))
11373		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
11374		     UNSPEC_GATHER))
11375	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11376  "TARGET_AVX2"
11377{
11378  operands[7]
11379    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11380					operands[5]), UNSPEC_VSIBADDR);
11381})
11382
11383(define_insn "*avx2_gathersi<mode>"
11384  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11385	(unspec:VEC_GATHER_MODE
11386	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
11387	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11388	     [(unspec:P
11389		[(match_operand:P 3 "vsib_address_operand" "p")
11390		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
11391		 (match_operand:SI 6 "const1248_operand" "n")]
11392		UNSPEC_VSIBADDR)])
11393	   (mem:BLK (scratch))
11394	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
11395	  UNSPEC_GATHER))
11396   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11397  "TARGET_AVX2"
11398  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
11399  [(set_attr "type" "ssemov")
11400   (set_attr "prefix" "vex")
11401   (set_attr "mode" "<sseinsnmode>")])
11402
11403(define_insn "*avx2_gathersi<mode>_2"
11404  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11405	(unspec:VEC_GATHER_MODE
11406	  [(pc)
11407	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11408	     [(unspec:P
11409		[(match_operand:P 2 "vsib_address_operand" "p")
11410		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
11411		 (match_operand:SI 5 "const1248_operand" "n")]
11412		UNSPEC_VSIBADDR)])
11413	   (mem:BLK (scratch))
11414	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
11415	  UNSPEC_GATHER))
11416   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11417  "TARGET_AVX2"
11418  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
11419  [(set_attr "type" "ssemov")
11420   (set_attr "prefix" "vex")
11421   (set_attr "mode" "<sseinsnmode>")])
11422
11423(define_expand "avx2_gatherdi<mode>"
11424  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11425		   (unspec:VEC_GATHER_MODE
11426		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
11427		      (mem:<ssescalarmode>
11428			(match_par_dup 7
11429			  [(match_operand 2 "vsib_address_operand")
11430			   (match_operand:<VEC_GATHER_IDXDI>
11431			      3 "register_operand")
11432			   (match_operand:SI 5 "const1248_operand ")]))
11433		      (mem:BLK (scratch))
11434		      (match_operand:<VEC_GATHER_SRCDI>
11435			4 "register_operand")]
11436		     UNSPEC_GATHER))
11437	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11438  "TARGET_AVX2"
11439{
11440  operands[7]
11441    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11442					operands[5]), UNSPEC_VSIBADDR);
11443})
11444
11445(define_insn "*avx2_gatherdi<mode>"
11446  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11447	(unspec:VEC_GATHER_MODE
11448	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11449	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11450	     [(unspec:P
11451		[(match_operand:P 3 "vsib_address_operand" "p")
11452		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11453		 (match_operand:SI 6 "const1248_operand" "n")]
11454		UNSPEC_VSIBADDR)])
11455	   (mem:BLK (scratch))
11456	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11457	  UNSPEC_GATHER))
11458   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11459  "TARGET_AVX2"
11460  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
11461  [(set_attr "type" "ssemov")
11462   (set_attr "prefix" "vex")
11463   (set_attr "mode" "<sseinsnmode>")])
11464
11465(define_insn "*avx2_gatherdi<mode>_2"
11466  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11467	(unspec:VEC_GATHER_MODE
11468	  [(pc)
11469	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11470	     [(unspec:P
11471		[(match_operand:P 2 "vsib_address_operand" "p")
11472		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11473		 (match_operand:SI 5 "const1248_operand" "n")]
11474		UNSPEC_VSIBADDR)])
11475	   (mem:BLK (scratch))
11476	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11477	  UNSPEC_GATHER))
11478   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11479  "TARGET_AVX2"
11480{
11481  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
11482    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
11483  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
11484}
11485  [(set_attr "type" "ssemov")
11486   (set_attr "prefix" "vex")
11487   (set_attr "mode" "<sseinsnmode>")])
11488
11489(define_insn "*avx2_gatherdi<mode>_3"
11490  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11491	(vec_select:<VEC_GATHER_SRCDI>
11492	  (unspec:VI4F_256
11493	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11494	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11495	       [(unspec:P
11496		  [(match_operand:P 3 "vsib_address_operand" "p")
11497		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11498		   (match_operand:SI 6 "const1248_operand" "n")]
11499		  UNSPEC_VSIBADDR)])
11500	     (mem:BLK (scratch))
11501	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11502	     UNSPEC_GATHER)
11503	  (parallel [(const_int 0) (const_int 1)
11504		     (const_int 2) (const_int 3)])))
11505   (clobber (match_scratch:VI4F_256 1 "=&x"))]
11506  "TARGET_AVX2"
11507  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
11508  [(set_attr "type" "ssemov")
11509   (set_attr "prefix" "vex")
11510   (set_attr "mode" "<sseinsnmode>")])
11511
11512(define_insn "*avx2_gatherdi<mode>_4"
11513  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11514	(vec_select:<VEC_GATHER_SRCDI>
11515	  (unspec:VI4F_256
11516	    [(pc)
11517	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11518	       [(unspec:P
11519		  [(match_operand:P 2 "vsib_address_operand" "p")
11520		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11521		   (match_operand:SI 5 "const1248_operand" "n")]
11522		  UNSPEC_VSIBADDR)])
11523	     (mem:BLK (scratch))
11524	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11525	    UNSPEC_GATHER)
11526	  (parallel [(const_int 0) (const_int 1)
11527		     (const_int 2) (const_int 3)])))
11528   (clobber (match_scratch:VI4F_256 1 "=&x"))]
11529  "TARGET_AVX2"
11530  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
11531  [(set_attr "type" "ssemov")
11532   (set_attr "prefix" "vex")
11533   (set_attr "mode" "<sseinsnmode>")])
11534