xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/mmx.md (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj;; GCC machine description for MMX and 3dNOW! instructions
2*38fd1498Szrj;; Copyright (C) 2005-2018 Free Software Foundation, Inc.
3*38fd1498Szrj;;
4*38fd1498Szrj;; This file is part of GCC.
5*38fd1498Szrj;;
6*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify
7*38fd1498Szrj;; it under the terms of the GNU General Public License as published by
8*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj;; any later version.
10*38fd1498Szrj;;
11*38fd1498Szrj;; GCC is distributed in the hope that it will be useful,
12*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj;; GNU General Public License for more details.
15*38fd1498Szrj;;
16*38fd1498Szrj;; You should have received a copy of the GNU General Public License
17*38fd1498Szrj;; along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj;; <http://www.gnu.org/licenses/>.
19*38fd1498Szrj
20*38fd1498Szrj;; The MMX and 3dNOW! patterns are in the same file because they use
21*38fd1498Szrj;; the same register file, and 3dNOW! adds a number of extensions to
22*38fd1498Szrj;; the base integer MMX isa.
23*38fd1498Szrj
24*38fd1498Szrj;; Note!  Except for the basic move instructions, *all* of these
25*38fd1498Szrj;; patterns are outside the normal optabs namespace.  This is because
26*38fd1498Szrj;; use of these registers requires the insertion of emms or femms
27*38fd1498Szrj;; instructions to return to normal fpu mode.  The compiler doesn't
28*38fd1498Szrj;; know how to do that itself, which means it's up to the user.  Which
29*38fd1498Szrj;; means that we should never use any of these patterns except at the
30*38fd1498Szrj;; direction of the user via a builtin.
31*38fd1498Szrj
32*38fd1498Szrj(define_c_enum "unspec" [
33*38fd1498Szrj  UNSPEC_MOVNTQ
34*38fd1498Szrj  UNSPEC_PFRCP
35*38fd1498Szrj  UNSPEC_PFRCPIT1
36*38fd1498Szrj  UNSPEC_PFRCPIT2
37*38fd1498Szrj  UNSPEC_PFRSQRT
38*38fd1498Szrj  UNSPEC_PFRSQIT1
39*38fd1498Szrj])
40*38fd1498Szrj
41*38fd1498Szrj(define_c_enum "unspecv" [
42*38fd1498Szrj  UNSPECV_EMMS
43*38fd1498Szrj  UNSPECV_FEMMS
44*38fd1498Szrj])
45*38fd1498Szrj
46*38fd1498Szrj;; 8 byte integral modes handled by MMX (and by extension, SSE)
47*38fd1498Szrj(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
48*38fd1498Szrj(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
49*38fd1498Szrj
50*38fd1498Szrj;; All 8-byte vector modes handled by MMX
51*38fd1498Szrj(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
52*38fd1498Szrj
53*38fd1498Szrj;; Mix-n-match
54*38fd1498Szrj(define_mode_iterator MMXMODE12 [V8QI V4HI])
55*38fd1498Szrj(define_mode_iterator MMXMODE24 [V4HI V2SI])
56*38fd1498Szrj(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
57*38fd1498Szrj
58*38fd1498Szrj;; Mapping from integer vector mode to mnemonic suffix
59*38fd1498Szrj(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
60*38fd1498Szrj
61*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62*38fd1498Szrj;;
63*38fd1498Szrj;; Move patterns
64*38fd1498Szrj;;
65*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
66*38fd1498Szrj
67*38fd1498Szrj;; All of these patterns are enabled for MMX as well as 3dNOW.
68*38fd1498Szrj;; This is essential for maintaining stable calling conventions.
69*38fd1498Szrj
70*38fd1498Szrj(define_expand "mov<mode>"
71*38fd1498Szrj  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
72*38fd1498Szrj	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
73*38fd1498Szrj  "TARGET_MMX"
74*38fd1498Szrj{
75*38fd1498Szrj  ix86_expand_vector_move (<MODE>mode, operands);
76*38fd1498Szrj  DONE;
77*38fd1498Szrj})
78*38fd1498Szrj
79*38fd1498Szrj(define_insn "*mov<mode>_internal"
80*38fd1498Szrj  [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
81*38fd1498Szrj    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r   ,?!Ym,v,v,v,m,r ,Yi,!Ym,*Yi")
82*38fd1498Szrj	(match_operand:MMXMODE 1 "vector_move_operand"
83*38fd1498Szrj    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Yn,r   ,C,v,m,v,Yj,r ,*Yj,!Yn"))]
84*38fd1498Szrj  "TARGET_MMX
85*38fd1498Szrj   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
86*38fd1498Szrj{
87*38fd1498Szrj  switch (get_attr_type (insn))
88*38fd1498Szrj    {
89*38fd1498Szrj    case TYPE_MULTI:
90*38fd1498Szrj      return "#";
91*38fd1498Szrj
92*38fd1498Szrj    case TYPE_IMOV:
93*38fd1498Szrj      if (get_attr_mode (insn) == MODE_SI)
94*38fd1498Szrj	return "mov{l}\t{%1, %k0|%k0, %1}";
95*38fd1498Szrj      else
96*38fd1498Szrj	return "mov{q}\t{%1, %0|%0, %1}";
97*38fd1498Szrj
98*38fd1498Szrj    case TYPE_MMX:
99*38fd1498Szrj      return "pxor\t%0, %0";
100*38fd1498Szrj
101*38fd1498Szrj    case TYPE_MMXMOV:
102*38fd1498Szrj      /* Handle broken assemblers that require movd instead of movq.  */
103*38fd1498Szrj      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
104*38fd1498Szrj	  && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
105*38fd1498Szrj	return "movd\t{%1, %0|%0, %1}";
106*38fd1498Szrj      return "movq\t{%1, %0|%0, %1}";
107*38fd1498Szrj
108*38fd1498Szrj    case TYPE_SSECVT:
109*38fd1498Szrj      if (SSE_REG_P (operands[0]))
110*38fd1498Szrj	return "movq2dq\t{%1, %0|%0, %1}";
111*38fd1498Szrj      else
112*38fd1498Szrj	return "movdq2q\t{%1, %0|%0, %1}";
113*38fd1498Szrj
114*38fd1498Szrj    case TYPE_SSELOG1:
115*38fd1498Szrj      return standard_sse_constant_opcode (insn, operands);
116*38fd1498Szrj
117*38fd1498Szrj    case TYPE_SSEMOV:
118*38fd1498Szrj      switch (get_attr_mode (insn))
119*38fd1498Szrj	{
120*38fd1498Szrj	case MODE_DI:
121*38fd1498Szrj	  /* Handle broken assemblers that require movd instead of movq.  */
122*38fd1498Szrj	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
123*38fd1498Szrj	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
124*38fd1498Szrj	    return "%vmovd\t{%1, %0|%0, %1}";
125*38fd1498Szrj	  return "%vmovq\t{%1, %0|%0, %1}";
126*38fd1498Szrj	case MODE_TI:
127*38fd1498Szrj	  return "%vmovdqa\t{%1, %0|%0, %1}";
128*38fd1498Szrj	case MODE_XI:
129*38fd1498Szrj	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
130*38fd1498Szrj
131*38fd1498Szrj	case MODE_V2SF:
132*38fd1498Szrj	  if (TARGET_AVX && REG_P (operands[0]))
133*38fd1498Szrj	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
134*38fd1498Szrj	  return "%vmovlps\t{%1, %0|%0, %1}";
135*38fd1498Szrj	case MODE_V4SF:
136*38fd1498Szrj	  return "%vmovaps\t{%1, %0|%0, %1}";
137*38fd1498Szrj
138*38fd1498Szrj	default:
139*38fd1498Szrj	  gcc_unreachable ();
140*38fd1498Szrj	}
141*38fd1498Szrj
142*38fd1498Szrj    default:
143*38fd1498Szrj      gcc_unreachable ();
144*38fd1498Szrj    }
145*38fd1498Szrj}
146*38fd1498Szrj  [(set (attr "isa")
147*38fd1498Szrj     (cond [(eq_attr "alternative" "0,1")
148*38fd1498Szrj	      (const_string "nox64")
149*38fd1498Szrj	    (eq_attr "alternative" "2,3,4,9,10,15,16")
150*38fd1498Szrj	      (const_string "x64")
151*38fd1498Szrj	   ]
152*38fd1498Szrj	   (const_string "*")))
153*38fd1498Szrj   (set (attr "type")
154*38fd1498Szrj     (cond [(eq_attr "alternative" "0,1")
155*38fd1498Szrj	      (const_string "multi")
156*38fd1498Szrj	    (eq_attr "alternative" "2,3,4")
157*38fd1498Szrj	      (const_string "imov")
158*38fd1498Szrj	    (eq_attr "alternative" "5")
159*38fd1498Szrj	      (const_string "mmx")
160*38fd1498Szrj	    (eq_attr "alternative" "6,7,8,9,10")
161*38fd1498Szrj	      (const_string "mmxmov")
162*38fd1498Szrj	    (eq_attr "alternative" "11")
163*38fd1498Szrj	      (const_string "sselog1")
164*38fd1498Szrj	    (eq_attr "alternative" "17,18")
165*38fd1498Szrj	      (const_string "ssecvt")
166*38fd1498Szrj	   ]
167*38fd1498Szrj	   (const_string "ssemov")))
168*38fd1498Szrj   (set (attr "prefix_rex")
169*38fd1498Szrj     (if_then_else (eq_attr "alternative" "9,10,15,16")
170*38fd1498Szrj       (const_string "1")
171*38fd1498Szrj       (const_string "*")))
172*38fd1498Szrj   (set (attr "prefix")
173*38fd1498Szrj     (if_then_else (eq_attr "type" "sselog1,ssemov")
174*38fd1498Szrj       (const_string "maybe_vex")
175*38fd1498Szrj       (const_string "orig")))
176*38fd1498Szrj   (set (attr "prefix_data16")
177*38fd1498Szrj     (if_then_else
178*38fd1498Szrj       (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
179*38fd1498Szrj       (const_string "1")
180*38fd1498Szrj       (const_string "*")))
181*38fd1498Szrj   (set (attr "mode")
182*38fd1498Szrj     (cond [(eq_attr "alternative" "2")
183*38fd1498Szrj	      (const_string "SI")
184*38fd1498Szrj	    (eq_attr "alternative" "11,12")
185*38fd1498Szrj	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
186*38fd1498Szrj			  (match_operand 1 "ext_sse_reg_operand"))
187*38fd1498Szrj			(const_string "XI")
188*38fd1498Szrj		     (match_test "<MODE>mode == V2SFmode")
189*38fd1498Szrj		       (const_string "V4SF")
190*38fd1498Szrj		     (ior (not (match_test "TARGET_SSE2"))
191*38fd1498Szrj			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
192*38fd1498Szrj		       (const_string "V4SF")
193*38fd1498Szrj		     (match_test "TARGET_AVX")
194*38fd1498Szrj		       (const_string "TI")
195*38fd1498Szrj		     (match_test "optimize_function_for_size_p (cfun)")
196*38fd1498Szrj		       (const_string "V4SF")
197*38fd1498Szrj		    ]
198*38fd1498Szrj		    (const_string "TI"))
199*38fd1498Szrj
200*38fd1498Szrj	    (and (eq_attr "alternative" "13,14")
201*38fd1498Szrj	    	 (ior (match_test "<MODE>mode == V2SFmode")
202*38fd1498Szrj		      (not (match_test "TARGET_SSE2"))))
203*38fd1498Szrj	      (const_string "V2SF")
204*38fd1498Szrj	   ]
205*38fd1498Szrj	   (const_string "DI")))])
206*38fd1498Szrj
207*38fd1498Szrj(define_split
208*38fd1498Szrj  [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
209*38fd1498Szrj        (match_operand:MMXMODE 1 "general_gr_operand"))]
210*38fd1498Szrj  "!TARGET_64BIT && reload_completed"
211*38fd1498Szrj  [(const_int 0)]
212*38fd1498Szrj  "ix86_split_long_move (operands); DONE;")
213*38fd1498Szrj
214*38fd1498Szrj(define_expand "movmisalign<mode>"
215*38fd1498Szrj  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
216*38fd1498Szrj	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
217*38fd1498Szrj  "TARGET_MMX"
218*38fd1498Szrj{
219*38fd1498Szrj  ix86_expand_vector_move (<MODE>mode, operands);
220*38fd1498Szrj  DONE;
221*38fd1498Szrj})
222*38fd1498Szrj
223*38fd1498Szrj(define_insn "sse_movntq"
224*38fd1498Szrj  [(set (match_operand:DI 0 "memory_operand" "=m")
225*38fd1498Szrj	(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
226*38fd1498Szrj		   UNSPEC_MOVNTQ))]
227*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
228*38fd1498Szrj  "movntq\t{%1, %0|%0, %1}"
229*38fd1498Szrj  [(set_attr "type" "mmxmov")
230*38fd1498Szrj   (set_attr "mode" "DI")])
231*38fd1498Szrj
232*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
233*38fd1498Szrj;;
234*38fd1498Szrj;; Parallel single-precision floating point arithmetic
235*38fd1498Szrj;;
236*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
237*38fd1498Szrj
238*38fd1498Szrj(define_expand "mmx_addv2sf3"
239*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand")
240*38fd1498Szrj	(plus:V2SF
241*38fd1498Szrj	  (match_operand:V2SF 1 "nonimmediate_operand")
242*38fd1498Szrj	  (match_operand:V2SF 2 "nonimmediate_operand")))]
243*38fd1498Szrj  "TARGET_3DNOW"
244*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
245*38fd1498Szrj
246*38fd1498Szrj(define_insn "*mmx_addv2sf3"
247*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
248*38fd1498Szrj	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
249*38fd1498Szrj		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
250*38fd1498Szrj  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
251*38fd1498Szrj  "pfadd\t{%2, %0|%0, %2}"
252*38fd1498Szrj  [(set_attr "type" "mmxadd")
253*38fd1498Szrj   (set_attr "prefix_extra" "1")
254*38fd1498Szrj   (set_attr "mode" "V2SF")])
255*38fd1498Szrj
256*38fd1498Szrj(define_expand "mmx_subv2sf3"
257*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand")
258*38fd1498Szrj        (minus:V2SF (match_operand:V2SF 1 "register_operand")
259*38fd1498Szrj		    (match_operand:V2SF 2 "nonimmediate_operand")))]
260*38fd1498Szrj  "TARGET_3DNOW")
261*38fd1498Szrj
262*38fd1498Szrj(define_expand "mmx_subrv2sf3"
263*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand")
264*38fd1498Szrj        (minus:V2SF (match_operand:V2SF 2 "register_operand")
265*38fd1498Szrj		    (match_operand:V2SF 1 "nonimmediate_operand")))]
266*38fd1498Szrj  "TARGET_3DNOW")
267*38fd1498Szrj
268*38fd1498Szrj(define_insn "*mmx_subv2sf3"
269*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y,y")
270*38fd1498Szrj        (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
271*38fd1498Szrj		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
272*38fd1498Szrj  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
273*38fd1498Szrj  "@
274*38fd1498Szrj   pfsub\t{%2, %0|%0, %2}
275*38fd1498Szrj   pfsubr\t{%1, %0|%0, %1}"
276*38fd1498Szrj  [(set_attr "type" "mmxadd")
277*38fd1498Szrj   (set_attr "prefix_extra" "1")
278*38fd1498Szrj   (set_attr "mode" "V2SF")])
279*38fd1498Szrj
280*38fd1498Szrj(define_expand "mmx_mulv2sf3"
281*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand")
282*38fd1498Szrj	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
283*38fd1498Szrj		   (match_operand:V2SF 2 "nonimmediate_operand")))]
284*38fd1498Szrj  "TARGET_3DNOW"
285*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
286*38fd1498Szrj
287*38fd1498Szrj(define_insn "*mmx_mulv2sf3"
288*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
289*38fd1498Szrj	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
290*38fd1498Szrj		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
291*38fd1498Szrj  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
292*38fd1498Szrj  "pfmul\t{%2, %0|%0, %2}"
293*38fd1498Szrj  [(set_attr "type" "mmxmul")
294*38fd1498Szrj   (set_attr "prefix_extra" "1")
295*38fd1498Szrj   (set_attr "mode" "V2SF")])
296*38fd1498Szrj
297*38fd1498Szrj(define_expand "mmx_<code>v2sf3"
298*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand")
299*38fd1498Szrj        (smaxmin:V2SF
300*38fd1498Szrj	  (match_operand:V2SF 1 "nonimmediate_operand")
301*38fd1498Szrj	  (match_operand:V2SF 2 "nonimmediate_operand")))]
302*38fd1498Szrj  "TARGET_3DNOW"
303*38fd1498Szrj{
304*38fd1498Szrj  if (!flag_finite_math_only || flag_signed_zeros)
305*38fd1498Szrj    {
306*38fd1498Szrj      operands[1] = force_reg (V2SFmode, operands[1]);
307*38fd1498Szrj      emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
308*38fd1498Szrj		 (operands[0], operands[1], operands[2]));
309*38fd1498Szrj      DONE;
310*38fd1498Szrj    }
311*38fd1498Szrj  else
312*38fd1498Szrj    ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
313*38fd1498Szrj})
314*38fd1498Szrj
315*38fd1498Szrj;; These versions of the min/max patterns are intentionally ignorant of
316*38fd1498Szrj;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
317*38fd1498Szrj;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
318*38fd1498Szrj;; are undefined in this condition, we're certain this is correct.
319*38fd1498Szrj
320*38fd1498Szrj(define_insn "*mmx_<code>v2sf3"
321*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
322*38fd1498Szrj        (smaxmin:V2SF
323*38fd1498Szrj	  (match_operand:V2SF 1 "nonimmediate_operand" "%0")
324*38fd1498Szrj	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
325*38fd1498Szrj  "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
326*38fd1498Szrj  "pf<maxmin_float>\t{%2, %0|%0, %2}"
327*38fd1498Szrj  [(set_attr "type" "mmxadd")
328*38fd1498Szrj   (set_attr "prefix_extra" "1")
329*38fd1498Szrj   (set_attr "mode" "V2SF")])
330*38fd1498Szrj
331*38fd1498Szrj;; These versions of the min/max patterns implement exactly the operations
332*38fd1498Szrj;;   min = (op1 < op2 ? op1 : op2)
333*38fd1498Szrj;;   max = (!(op1 < op2) ? op1 : op2)
334*38fd1498Szrj;; Their operands are not commutative, and thus they may be used in the
335*38fd1498Szrj;; presence of -0.0 and NaN.
336*38fd1498Szrj
337*38fd1498Szrj(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
338*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
339*38fd1498Szrj        (unspec:V2SF
340*38fd1498Szrj	  [(match_operand:V2SF 1 "register_operand" "0")
341*38fd1498Szrj	   (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
342*38fd1498Szrj	  IEEE_MAXMIN))]
343*38fd1498Szrj  "TARGET_3DNOW"
344*38fd1498Szrj  "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
345*38fd1498Szrj  [(set_attr "type" "mmxadd")
346*38fd1498Szrj   (set_attr "prefix_extra" "1")
347*38fd1498Szrj   (set_attr "mode" "V2SF")])
348*38fd1498Szrj
349*38fd1498Szrj(define_insn "mmx_rcpv2sf2"
350*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
351*38fd1498Szrj        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
352*38fd1498Szrj		     UNSPEC_PFRCP))]
353*38fd1498Szrj  "TARGET_3DNOW"
354*38fd1498Szrj  "pfrcp\t{%1, %0|%0, %1}"
355*38fd1498Szrj  [(set_attr "type" "mmx")
356*38fd1498Szrj   (set_attr "prefix_extra" "1")
357*38fd1498Szrj   (set_attr "mode" "V2SF")])
358*38fd1498Szrj
359*38fd1498Szrj(define_insn "mmx_rcpit1v2sf3"
360*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
361*38fd1498Szrj	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
362*38fd1498Szrj		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
363*38fd1498Szrj		     UNSPEC_PFRCPIT1))]
364*38fd1498Szrj  "TARGET_3DNOW"
365*38fd1498Szrj  "pfrcpit1\t{%2, %0|%0, %2}"
366*38fd1498Szrj  [(set_attr "type" "mmx")
367*38fd1498Szrj   (set_attr "prefix_extra" "1")
368*38fd1498Szrj   (set_attr "mode" "V2SF")])
369*38fd1498Szrj
370*38fd1498Szrj(define_insn "mmx_rcpit2v2sf3"
371*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
372*38fd1498Szrj	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
373*38fd1498Szrj		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
374*38fd1498Szrj		     UNSPEC_PFRCPIT2))]
375*38fd1498Szrj  "TARGET_3DNOW"
376*38fd1498Szrj  "pfrcpit2\t{%2, %0|%0, %2}"
377*38fd1498Szrj  [(set_attr "type" "mmx")
378*38fd1498Szrj   (set_attr "prefix_extra" "1")
379*38fd1498Szrj   (set_attr "mode" "V2SF")])
380*38fd1498Szrj
381*38fd1498Szrj(define_insn "mmx_rsqrtv2sf2"
382*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
383*38fd1498Szrj	(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
384*38fd1498Szrj		     UNSPEC_PFRSQRT))]
385*38fd1498Szrj  "TARGET_3DNOW"
386*38fd1498Szrj  "pfrsqrt\t{%1, %0|%0, %1}"
387*38fd1498Szrj  [(set_attr "type" "mmx")
388*38fd1498Szrj   (set_attr "prefix_extra" "1")
389*38fd1498Szrj   (set_attr "mode" "V2SF")])
390*38fd1498Szrj
391*38fd1498Szrj(define_insn "mmx_rsqit1v2sf3"
392*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
393*38fd1498Szrj	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
394*38fd1498Szrj		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
395*38fd1498Szrj		     UNSPEC_PFRSQIT1))]
396*38fd1498Szrj  "TARGET_3DNOW"
397*38fd1498Szrj  "pfrsqit1\t{%2, %0|%0, %2}"
398*38fd1498Szrj  [(set_attr "type" "mmx")
399*38fd1498Szrj   (set_attr "prefix_extra" "1")
400*38fd1498Szrj   (set_attr "mode" "V2SF")])
401*38fd1498Szrj
402*38fd1498Szrj(define_insn "mmx_haddv2sf3"
403*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
404*38fd1498Szrj	(vec_concat:V2SF
405*38fd1498Szrj	  (plus:SF
406*38fd1498Szrj	    (vec_select:SF
407*38fd1498Szrj	      (match_operand:V2SF 1 "register_operand" "0")
408*38fd1498Szrj	      (parallel [(const_int  0)]))
409*38fd1498Szrj	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
410*38fd1498Szrj	  (plus:SF
411*38fd1498Szrj            (vec_select:SF
412*38fd1498Szrj	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
413*38fd1498Szrj	      (parallel [(const_int  0)]))
414*38fd1498Szrj	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
415*38fd1498Szrj  "TARGET_3DNOW"
416*38fd1498Szrj  "pfacc\t{%2, %0|%0, %2}"
417*38fd1498Szrj  [(set_attr "type" "mmxadd")
418*38fd1498Szrj   (set_attr "prefix_extra" "1")
419*38fd1498Szrj   (set_attr "mode" "V2SF")])
420*38fd1498Szrj
421*38fd1498Szrj(define_insn "mmx_hsubv2sf3"
422*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
423*38fd1498Szrj	(vec_concat:V2SF
424*38fd1498Szrj	  (minus:SF
425*38fd1498Szrj	    (vec_select:SF
426*38fd1498Szrj	      (match_operand:V2SF 1 "register_operand" "0")
427*38fd1498Szrj	      (parallel [(const_int  0)]))
428*38fd1498Szrj	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
429*38fd1498Szrj	  (minus:SF
430*38fd1498Szrj            (vec_select:SF
431*38fd1498Szrj	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
432*38fd1498Szrj	      (parallel [(const_int  0)]))
433*38fd1498Szrj	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
434*38fd1498Szrj  "TARGET_3DNOW_A"
435*38fd1498Szrj  "pfnacc\t{%2, %0|%0, %2}"
436*38fd1498Szrj  [(set_attr "type" "mmxadd")
437*38fd1498Szrj   (set_attr "prefix_extra" "1")
438*38fd1498Szrj   (set_attr "mode" "V2SF")])
439*38fd1498Szrj
440*38fd1498Szrj(define_insn "mmx_addsubv2sf3"
441*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
442*38fd1498Szrj        (vec_merge:V2SF
443*38fd1498Szrj          (plus:V2SF
444*38fd1498Szrj            (match_operand:V2SF 1 "register_operand" "0")
445*38fd1498Szrj            (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
446*38fd1498Szrj          (minus:V2SF (match_dup 1) (match_dup 2))
447*38fd1498Szrj          (const_int 1)))]
448*38fd1498Szrj  "TARGET_3DNOW_A"
449*38fd1498Szrj  "pfpnacc\t{%2, %0|%0, %2}"
450*38fd1498Szrj  [(set_attr "type" "mmxadd")
451*38fd1498Szrj   (set_attr "prefix_extra" "1")
452*38fd1498Szrj   (set_attr "mode" "V2SF")])
453*38fd1498Szrj
454*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
455*38fd1498Szrj;;
456*38fd1498Szrj;; Parallel single-precision floating point comparisons
457*38fd1498Szrj;;
458*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
459*38fd1498Szrj
460*38fd1498Szrj(define_expand "mmx_eqv2sf3"
461*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand")
462*38fd1498Szrj	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
463*38fd1498Szrj		 (match_operand:V2SF 2 "nonimmediate_operand")))]
464*38fd1498Szrj  "TARGET_3DNOW"
465*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
466*38fd1498Szrj
467*38fd1498Szrj(define_insn "*mmx_eqv2sf3"
468*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
469*38fd1498Szrj	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
470*38fd1498Szrj		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
471*38fd1498Szrj  "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
472*38fd1498Szrj  "pfcmpeq\t{%2, %0|%0, %2}"
473*38fd1498Szrj  [(set_attr "type" "mmxcmp")
474*38fd1498Szrj   (set_attr "prefix_extra" "1")
475*38fd1498Szrj   (set_attr "mode" "V2SF")])
476*38fd1498Szrj
477*38fd1498Szrj(define_insn "mmx_gtv2sf3"
478*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
479*38fd1498Szrj	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
480*38fd1498Szrj		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
481*38fd1498Szrj  "TARGET_3DNOW"
482*38fd1498Szrj  "pfcmpgt\t{%2, %0|%0, %2}"
483*38fd1498Szrj  [(set_attr "type" "mmxcmp")
484*38fd1498Szrj   (set_attr "prefix_extra" "1")
485*38fd1498Szrj   (set_attr "mode" "V2SF")])
486*38fd1498Szrj
487*38fd1498Szrj(define_insn "mmx_gev2sf3"
488*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
489*38fd1498Szrj	(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
490*38fd1498Szrj		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
491*38fd1498Szrj  "TARGET_3DNOW"
492*38fd1498Szrj  "pfcmpge\t{%2, %0|%0, %2}"
493*38fd1498Szrj  [(set_attr "type" "mmxcmp")
494*38fd1498Szrj   (set_attr "prefix_extra" "1")
495*38fd1498Szrj   (set_attr "mode" "V2SF")])
496*38fd1498Szrj
497*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
498*38fd1498Szrj;;
499*38fd1498Szrj;; Parallel single-precision floating point conversion operations
500*38fd1498Szrj;;
501*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
502*38fd1498Szrj
503*38fd1498Szrj(define_insn "mmx_pf2id"
504*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
505*38fd1498Szrj	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
506*38fd1498Szrj  "TARGET_3DNOW"
507*38fd1498Szrj  "pf2id\t{%1, %0|%0, %1}"
508*38fd1498Szrj  [(set_attr "type" "mmxcvt")
509*38fd1498Szrj   (set_attr "prefix_extra" "1")
510*38fd1498Szrj   (set_attr "mode" "V2SF")])
511*38fd1498Szrj
512*38fd1498Szrj(define_insn "mmx_pf2iw"
513*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
514*38fd1498Szrj	(sign_extend:V2SI
515*38fd1498Szrj	  (ss_truncate:V2HI
516*38fd1498Szrj	    (fix:V2SI
517*38fd1498Szrj	      (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
518*38fd1498Szrj  "TARGET_3DNOW_A"
519*38fd1498Szrj  "pf2iw\t{%1, %0|%0, %1}"
520*38fd1498Szrj  [(set_attr "type" "mmxcvt")
521*38fd1498Szrj   (set_attr "prefix_extra" "1")
522*38fd1498Szrj   (set_attr "mode" "V2SF")])
523*38fd1498Szrj
524*38fd1498Szrj(define_insn "mmx_pi2fw"
525*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
526*38fd1498Szrj	(float:V2SF
527*38fd1498Szrj	  (sign_extend:V2SI
528*38fd1498Szrj	    (truncate:V2HI
529*38fd1498Szrj	      (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
530*38fd1498Szrj  "TARGET_3DNOW_A"
531*38fd1498Szrj  "pi2fw\t{%1, %0|%0, %1}"
532*38fd1498Szrj  [(set_attr "type" "mmxcvt")
533*38fd1498Szrj   (set_attr "prefix_extra" "1")
534*38fd1498Szrj   (set_attr "mode" "V2SF")])
535*38fd1498Szrj
536*38fd1498Szrj(define_insn "mmx_floatv2si2"
537*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
538*38fd1498Szrj	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
539*38fd1498Szrj  "TARGET_3DNOW"
540*38fd1498Szrj  "pi2fd\t{%1, %0|%0, %1}"
541*38fd1498Szrj  [(set_attr "type" "mmxcvt")
542*38fd1498Szrj   (set_attr "prefix_extra" "1")
543*38fd1498Szrj   (set_attr "mode" "V2SF")])
544*38fd1498Szrj
545*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
546*38fd1498Szrj;;
547*38fd1498Szrj;; Parallel single-precision floating point element swizzling
548*38fd1498Szrj;;
549*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
550*38fd1498Szrj
551*38fd1498Szrj(define_insn "mmx_pswapdv2sf2"
552*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
553*38fd1498Szrj	(vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
554*38fd1498Szrj			 (parallel [(const_int 1) (const_int 0)])))]
555*38fd1498Szrj  "TARGET_3DNOW_A"
556*38fd1498Szrj  "pswapd\t{%1, %0|%0, %1}"
557*38fd1498Szrj  [(set_attr "type" "mmxcvt")
558*38fd1498Szrj   (set_attr "prefix_extra" "1")
559*38fd1498Szrj   (set_attr "mode" "V2SF")])
560*38fd1498Szrj
561*38fd1498Szrj(define_insn "*vec_dupv2sf"
562*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand" "=y")
563*38fd1498Szrj	(vec_duplicate:V2SF
564*38fd1498Szrj	  (match_operand:SF 1 "register_operand" "0")))]
565*38fd1498Szrj  "TARGET_MMX"
566*38fd1498Szrj  "punpckldq\t%0, %0"
567*38fd1498Szrj  [(set_attr "type" "mmxcvt")
568*38fd1498Szrj   (set_attr "mode" "DI")])
569*38fd1498Szrj
570*38fd1498Szrj(define_insn "*mmx_concatv2sf"
571*38fd1498Szrj  [(set (match_operand:V2SF 0 "register_operand"     "=y,y")
572*38fd1498Szrj	(vec_concat:V2SF
573*38fd1498Szrj	  (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
574*38fd1498Szrj	  (match_operand:SF 2 "vector_move_operand"  "ym,C")))]
575*38fd1498Szrj  "TARGET_MMX && !TARGET_SSE"
576*38fd1498Szrj  "@
577*38fd1498Szrj   punpckldq\t{%2, %0|%0, %2}
578*38fd1498Szrj   movd\t{%1, %0|%0, %1}"
579*38fd1498Szrj  [(set_attr "type" "mmxcvt,mmxmov")
580*38fd1498Szrj   (set_attr "mode" "DI")])
581*38fd1498Szrj
582*38fd1498Szrj(define_expand "vec_setv2sf"
583*38fd1498Szrj  [(match_operand:V2SF 0 "register_operand")
584*38fd1498Szrj   (match_operand:SF 1 "register_operand")
585*38fd1498Szrj   (match_operand 2 "const_int_operand")]
586*38fd1498Szrj  "TARGET_MMX"
587*38fd1498Szrj{
588*38fd1498Szrj  ix86_expand_vector_set (false, operands[0], operands[1],
589*38fd1498Szrj			  INTVAL (operands[2]));
590*38fd1498Szrj  DONE;
591*38fd1498Szrj})
592*38fd1498Szrj
593*38fd1498Szrj;; Avoid combining registers from different units in a single alternative,
594*38fd1498Szrj;; see comment above inline_secondary_memory_needed function in i386.c
595*38fd1498Szrj(define_insn_and_split "*vec_extractv2sf_0"
596*38fd1498Szrj  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x, m,y ,m,f,r")
597*38fd1498Szrj	(vec_select:SF
598*38fd1498Szrj	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
599*38fd1498Szrj	  (parallel [(const_int 0)])))]
600*38fd1498Szrj  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
601*38fd1498Szrj  "#"
602*38fd1498Szrj  "&& reload_completed"
603*38fd1498Szrj  [(set (match_dup 0) (match_dup 1))]
604*38fd1498Szrj  "operands[1] = gen_lowpart (SFmode, operands[1]);")
605*38fd1498Szrj
606*38fd1498Szrj;; Avoid combining registers from different units in a single alternative,
607*38fd1498Szrj;; see comment above inline_secondary_memory_needed function in i386.c
608*38fd1498Szrj(define_insn "*vec_extractv2sf_1"
609*38fd1498Szrj  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,x,y,x,f,r")
610*38fd1498Szrj	(vec_select:SF
611*38fd1498Szrj	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
612*38fd1498Szrj	  (parallel [(const_int 1)])))]
613*38fd1498Szrj  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
614*38fd1498Szrj  "@
615*38fd1498Szrj   punpckhdq\t%0, %0
616*38fd1498Szrj   %vmovshdup\t{%1, %0|%0, %1}
617*38fd1498Szrj   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
618*38fd1498Szrj   #
619*38fd1498Szrj   #
620*38fd1498Szrj   #
621*38fd1498Szrj   #"
622*38fd1498Szrj  [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
623*38fd1498Szrj   (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
624*38fd1498Szrj   (set (attr "length_immediate")
625*38fd1498Szrj     (if_then_else (eq_attr "alternative" "2")
626*38fd1498Szrj		   (const_string "1")
627*38fd1498Szrj		   (const_string "*")))
628*38fd1498Szrj   (set (attr "prefix_rep")
629*38fd1498Szrj     (if_then_else (eq_attr "alternative" "1")
630*38fd1498Szrj		   (const_string "1")
631*38fd1498Szrj		   (const_string "*")))
632*38fd1498Szrj   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
633*38fd1498Szrj   (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
634*38fd1498Szrj
635*38fd1498Szrj(define_split
636*38fd1498Szrj  [(set (match_operand:SF 0 "register_operand")
637*38fd1498Szrj	(vec_select:SF
638*38fd1498Szrj	  (match_operand:V2SF 1 "memory_operand")
639*38fd1498Szrj	  (parallel [(const_int 1)])))]
640*38fd1498Szrj  "TARGET_MMX && reload_completed"
641*38fd1498Szrj  [(set (match_dup 0) (match_dup 1))]
642*38fd1498Szrj  "operands[1] = adjust_address (operands[1], SFmode, 4);")
643*38fd1498Szrj
644*38fd1498Szrj(define_expand "vec_extractv2sfsf"
645*38fd1498Szrj  [(match_operand:SF 0 "register_operand")
646*38fd1498Szrj   (match_operand:V2SF 1 "register_operand")
647*38fd1498Szrj   (match_operand 2 "const_int_operand")]
648*38fd1498Szrj  "TARGET_MMX"
649*38fd1498Szrj{
650*38fd1498Szrj  ix86_expand_vector_extract (false, operands[0], operands[1],
651*38fd1498Szrj			      INTVAL (operands[2]));
652*38fd1498Szrj  DONE;
653*38fd1498Szrj})
654*38fd1498Szrj
655*38fd1498Szrj(define_expand "vec_initv2sfsf"
656*38fd1498Szrj  [(match_operand:V2SF 0 "register_operand")
657*38fd1498Szrj   (match_operand 1)]
658*38fd1498Szrj  "TARGET_SSE"
659*38fd1498Szrj{
660*38fd1498Szrj  ix86_expand_vector_init (false, operands[0], operands[1]);
661*38fd1498Szrj  DONE;
662*38fd1498Szrj})
663*38fd1498Szrj
664*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
665*38fd1498Szrj;;
666*38fd1498Szrj;; Parallel integral arithmetic
667*38fd1498Szrj;;
668*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
669*38fd1498Szrj
670*38fd1498Szrj(define_expand "mmx_<plusminus_insn><mode>3"
671*38fd1498Szrj  [(set (match_operand:MMXMODEI8 0 "register_operand")
672*38fd1498Szrj	(plusminus:MMXMODEI8
673*38fd1498Szrj	  (match_operand:MMXMODEI8 1 "nonimmediate_operand")
674*38fd1498Szrj	  (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
675*38fd1498Szrj  "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
676*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
677*38fd1498Szrj
678*38fd1498Szrj(define_insn "*mmx_<plusminus_insn><mode>3"
679*38fd1498Szrj  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
680*38fd1498Szrj        (plusminus:MMXMODEI8
681*38fd1498Szrj	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
682*38fd1498Szrj	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
683*38fd1498Szrj  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
684*38fd1498Szrj   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
685*38fd1498Szrj  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
686*38fd1498Szrj  [(set_attr "type" "mmxadd")
687*38fd1498Szrj   (set_attr "mode" "DI")])
688*38fd1498Szrj
689*38fd1498Szrj(define_expand "mmx_<plusminus_insn><mode>3"
690*38fd1498Szrj  [(set (match_operand:MMXMODE12 0 "register_operand")
691*38fd1498Szrj	(sat_plusminus:MMXMODE12
692*38fd1498Szrj	  (match_operand:MMXMODE12 1 "nonimmediate_operand")
693*38fd1498Szrj	  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
694*38fd1498Szrj  "TARGET_MMX"
695*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
696*38fd1498Szrj
697*38fd1498Szrj(define_insn "*mmx_<plusminus_insn><mode>3"
698*38fd1498Szrj  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
699*38fd1498Szrj        (sat_plusminus:MMXMODE12
700*38fd1498Szrj	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
701*38fd1498Szrj	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
702*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
703*38fd1498Szrj  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
704*38fd1498Szrj  [(set_attr "type" "mmxadd")
705*38fd1498Szrj   (set_attr "mode" "DI")])
706*38fd1498Szrj
707*38fd1498Szrj(define_expand "mmx_mulv4hi3"
708*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
709*38fd1498Szrj        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
710*38fd1498Szrj		   (match_operand:V4HI 2 "nonimmediate_operand")))]
711*38fd1498Szrj  "TARGET_MMX"
712*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
713*38fd1498Szrj
714*38fd1498Szrj(define_insn "*mmx_mulv4hi3"
715*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
716*38fd1498Szrj        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
717*38fd1498Szrj		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
718*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
719*38fd1498Szrj  "pmullw\t{%2, %0|%0, %2}"
720*38fd1498Szrj  [(set_attr "type" "mmxmul")
721*38fd1498Szrj   (set_attr "mode" "DI")])
722*38fd1498Szrj
723*38fd1498Szrj(define_expand "mmx_smulv4hi3_highpart"
724*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
725*38fd1498Szrj	(truncate:V4HI
726*38fd1498Szrj	  (lshiftrt:V4SI
727*38fd1498Szrj	    (mult:V4SI
728*38fd1498Szrj	      (sign_extend:V4SI
729*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand"))
730*38fd1498Szrj	      (sign_extend:V4SI
731*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand")))
732*38fd1498Szrj	    (const_int 16))))]
733*38fd1498Szrj  "TARGET_MMX"
734*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
735*38fd1498Szrj
736*38fd1498Szrj(define_insn "*mmx_smulv4hi3_highpart"
737*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
738*38fd1498Szrj	(truncate:V4HI
739*38fd1498Szrj	  (lshiftrt:V4SI
740*38fd1498Szrj	    (mult:V4SI
741*38fd1498Szrj	      (sign_extend:V4SI
742*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
743*38fd1498Szrj	      (sign_extend:V4SI
744*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
745*38fd1498Szrj	    (const_int 16))))]
746*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
747*38fd1498Szrj  "pmulhw\t{%2, %0|%0, %2}"
748*38fd1498Szrj  [(set_attr "type" "mmxmul")
749*38fd1498Szrj   (set_attr "mode" "DI")])
750*38fd1498Szrj
751*38fd1498Szrj(define_expand "mmx_umulv4hi3_highpart"
752*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
753*38fd1498Szrj	(truncate:V4HI
754*38fd1498Szrj	  (lshiftrt:V4SI
755*38fd1498Szrj	    (mult:V4SI
756*38fd1498Szrj	      (zero_extend:V4SI
757*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand"))
758*38fd1498Szrj	      (zero_extend:V4SI
759*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand")))
760*38fd1498Szrj	    (const_int 16))))]
761*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
762*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
763*38fd1498Szrj
764*38fd1498Szrj(define_insn "*mmx_umulv4hi3_highpart"
765*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
766*38fd1498Szrj	(truncate:V4HI
767*38fd1498Szrj	  (lshiftrt:V4SI
768*38fd1498Szrj	    (mult:V4SI
769*38fd1498Szrj	      (zero_extend:V4SI
770*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
771*38fd1498Szrj	      (zero_extend:V4SI
772*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
773*38fd1498Szrj	  (const_int 16))))]
774*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW_A)
775*38fd1498Szrj   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
776*38fd1498Szrj  "pmulhuw\t{%2, %0|%0, %2}"
777*38fd1498Szrj  [(set_attr "type" "mmxmul")
778*38fd1498Szrj   (set_attr "mode" "DI")])
779*38fd1498Szrj
780*38fd1498Szrj(define_expand "mmx_pmaddwd"
781*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand")
782*38fd1498Szrj        (plus:V2SI
783*38fd1498Szrj	  (mult:V2SI
784*38fd1498Szrj	    (sign_extend:V2SI
785*38fd1498Szrj	      (vec_select:V2HI
786*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand")
787*38fd1498Szrj		(parallel [(const_int 0) (const_int 2)])))
788*38fd1498Szrj	    (sign_extend:V2SI
789*38fd1498Szrj	      (vec_select:V2HI
790*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand")
791*38fd1498Szrj		(parallel [(const_int 0) (const_int 2)]))))
792*38fd1498Szrj	  (mult:V2SI
793*38fd1498Szrj	    (sign_extend:V2SI
794*38fd1498Szrj	      (vec_select:V2HI (match_dup 1)
795*38fd1498Szrj		(parallel [(const_int 1) (const_int 3)])))
796*38fd1498Szrj	    (sign_extend:V2SI
797*38fd1498Szrj	      (vec_select:V2HI (match_dup 2)
798*38fd1498Szrj		(parallel [(const_int 1) (const_int 3)]))))))]
799*38fd1498Szrj  "TARGET_MMX"
800*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
801*38fd1498Szrj
802*38fd1498Szrj(define_insn "*mmx_pmaddwd"
803*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
804*38fd1498Szrj        (plus:V2SI
805*38fd1498Szrj	  (mult:V2SI
806*38fd1498Szrj	    (sign_extend:V2SI
807*38fd1498Szrj	      (vec_select:V2HI
808*38fd1498Szrj		(match_operand:V4HI 1 "nonimmediate_operand" "%0")
809*38fd1498Szrj		(parallel [(const_int 0) (const_int 2)])))
810*38fd1498Szrj	    (sign_extend:V2SI
811*38fd1498Szrj	      (vec_select:V2HI
812*38fd1498Szrj		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
813*38fd1498Szrj		(parallel [(const_int 0) (const_int 2)]))))
814*38fd1498Szrj	  (mult:V2SI
815*38fd1498Szrj	    (sign_extend:V2SI
816*38fd1498Szrj	      (vec_select:V2HI (match_dup 1)
817*38fd1498Szrj		(parallel [(const_int 1) (const_int 3)])))
818*38fd1498Szrj	    (sign_extend:V2SI
819*38fd1498Szrj	      (vec_select:V2HI (match_dup 2)
820*38fd1498Szrj		(parallel [(const_int 1) (const_int 3)]))))))]
821*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
822*38fd1498Szrj  "pmaddwd\t{%2, %0|%0, %2}"
823*38fd1498Szrj  [(set_attr "type" "mmxmul")
824*38fd1498Szrj   (set_attr "mode" "DI")])
825*38fd1498Szrj
826*38fd1498Szrj(define_expand "mmx_pmulhrwv4hi3"
827*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
828*38fd1498Szrj	(truncate:V4HI
829*38fd1498Szrj	  (lshiftrt:V4SI
830*38fd1498Szrj	    (plus:V4SI
831*38fd1498Szrj	      (mult:V4SI
832*38fd1498Szrj	        (sign_extend:V4SI
833*38fd1498Szrj		  (match_operand:V4HI 1 "nonimmediate_operand"))
834*38fd1498Szrj	        (sign_extend:V4SI
835*38fd1498Szrj		  (match_operand:V4HI 2 "nonimmediate_operand")))
836*38fd1498Szrj	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
837*38fd1498Szrj				  (const_int 32768) (const_int 32768)]))
838*38fd1498Szrj	    (const_int 16))))]
839*38fd1498Szrj  "TARGET_3DNOW"
840*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
841*38fd1498Szrj
842*38fd1498Szrj(define_insn "*mmx_pmulhrwv4hi3"
843*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
844*38fd1498Szrj	(truncate:V4HI
845*38fd1498Szrj	  (lshiftrt:V4SI
846*38fd1498Szrj	    (plus:V4SI
847*38fd1498Szrj	      (mult:V4SI
848*38fd1498Szrj	        (sign_extend:V4SI
849*38fd1498Szrj		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
850*38fd1498Szrj	        (sign_extend:V4SI
851*38fd1498Szrj		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
852*38fd1498Szrj	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
853*38fd1498Szrj				  (const_int 32768) (const_int 32768)]))
854*38fd1498Szrj	    (const_int 16))))]
855*38fd1498Szrj  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
856*38fd1498Szrj  "pmulhrw\t{%2, %0|%0, %2}"
857*38fd1498Szrj  [(set_attr "type" "mmxmul")
858*38fd1498Szrj   (set_attr "prefix_extra" "1")
859*38fd1498Szrj   (set_attr "mode" "DI")])
860*38fd1498Szrj
861*38fd1498Szrj(define_expand "sse2_umulv1siv1di3"
862*38fd1498Szrj  [(set (match_operand:V1DI 0 "register_operand")
863*38fd1498Szrj        (mult:V1DI
864*38fd1498Szrj	  (zero_extend:V1DI
865*38fd1498Szrj	    (vec_select:V1SI
866*38fd1498Szrj	      (match_operand:V2SI 1 "nonimmediate_operand")
867*38fd1498Szrj	      (parallel [(const_int 0)])))
868*38fd1498Szrj	  (zero_extend:V1DI
869*38fd1498Szrj	    (vec_select:V1SI
870*38fd1498Szrj	      (match_operand:V2SI 2 "nonimmediate_operand")
871*38fd1498Szrj	      (parallel [(const_int 0)])))))]
872*38fd1498Szrj  "TARGET_SSE2"
873*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
874*38fd1498Szrj
875*38fd1498Szrj(define_insn "*sse2_umulv1siv1di3"
876*38fd1498Szrj  [(set (match_operand:V1DI 0 "register_operand" "=y")
877*38fd1498Szrj        (mult:V1DI
878*38fd1498Szrj	  (zero_extend:V1DI
879*38fd1498Szrj	    (vec_select:V1SI
880*38fd1498Szrj	      (match_operand:V2SI 1 "nonimmediate_operand" "%0")
881*38fd1498Szrj	      (parallel [(const_int 0)])))
882*38fd1498Szrj	  (zero_extend:V1DI
883*38fd1498Szrj	    (vec_select:V1SI
884*38fd1498Szrj	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
885*38fd1498Szrj	      (parallel [(const_int 0)])))))]
886*38fd1498Szrj  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
887*38fd1498Szrj  "pmuludq\t{%2, %0|%0, %2}"
888*38fd1498Szrj  [(set_attr "type" "mmxmul")
889*38fd1498Szrj   (set_attr "mode" "DI")])
890*38fd1498Szrj
891*38fd1498Szrj(define_expand "mmx_<code>v4hi3"
892*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
893*38fd1498Szrj        (smaxmin:V4HI
894*38fd1498Szrj	  (match_operand:V4HI 1 "nonimmediate_operand")
895*38fd1498Szrj	  (match_operand:V4HI 2 "nonimmediate_operand")))]
896*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
897*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
898*38fd1498Szrj
899*38fd1498Szrj(define_insn "*mmx_<code>v4hi3"
900*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
901*38fd1498Szrj        (smaxmin:V4HI
902*38fd1498Szrj	  (match_operand:V4HI 1 "nonimmediate_operand" "%0")
903*38fd1498Szrj	  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
904*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW_A)
905*38fd1498Szrj   && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
906*38fd1498Szrj  "p<maxmin_int>w\t{%2, %0|%0, %2}"
907*38fd1498Szrj  [(set_attr "type" "mmxadd")
908*38fd1498Szrj   (set_attr "mode" "DI")])
909*38fd1498Szrj
910*38fd1498Szrj(define_expand "mmx_<code>v8qi3"
911*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand")
912*38fd1498Szrj        (umaxmin:V8QI
913*38fd1498Szrj	  (match_operand:V8QI 1 "nonimmediate_operand")
914*38fd1498Szrj	  (match_operand:V8QI 2 "nonimmediate_operand")))]
915*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
916*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
917*38fd1498Szrj
918*38fd1498Szrj(define_insn "*mmx_<code>v8qi3"
919*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
920*38fd1498Szrj        (umaxmin:V8QI
921*38fd1498Szrj	  (match_operand:V8QI 1 "nonimmediate_operand" "%0")
922*38fd1498Szrj	  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
923*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW_A)
924*38fd1498Szrj   && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
925*38fd1498Szrj  "p<maxmin_int>b\t{%2, %0|%0, %2}"
926*38fd1498Szrj  [(set_attr "type" "mmxadd")
927*38fd1498Szrj   (set_attr "mode" "DI")])
928*38fd1498Szrj
929*38fd1498Szrj(define_insn "mmx_ashr<mode>3"
930*38fd1498Szrj  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
931*38fd1498Szrj        (ashiftrt:MMXMODE24
932*38fd1498Szrj	  (match_operand:MMXMODE24 1 "register_operand" "0")
933*38fd1498Szrj	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
934*38fd1498Szrj  "TARGET_MMX"
935*38fd1498Szrj  "psra<mmxvecsize>\t{%2, %0|%0, %2}"
936*38fd1498Szrj  [(set_attr "type" "mmxshft")
937*38fd1498Szrj   (set (attr "length_immediate")
938*38fd1498Szrj     (if_then_else (match_operand 2 "const_int_operand")
939*38fd1498Szrj       (const_string "1")
940*38fd1498Szrj       (const_string "0")))
941*38fd1498Szrj   (set_attr "mode" "DI")])
942*38fd1498Szrj
943*38fd1498Szrj(define_insn "mmx_<shift_insn><mode>3"
944*38fd1498Szrj  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
945*38fd1498Szrj        (any_lshift:MMXMODE248
946*38fd1498Szrj	  (match_operand:MMXMODE248 1 "register_operand" "0")
947*38fd1498Szrj	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
948*38fd1498Szrj  "TARGET_MMX"
949*38fd1498Szrj  "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
950*38fd1498Szrj  [(set_attr "type" "mmxshft")
951*38fd1498Szrj   (set (attr "length_immediate")
952*38fd1498Szrj     (if_then_else (match_operand 2 "const_int_operand")
953*38fd1498Szrj       (const_string "1")
954*38fd1498Szrj       (const_string "0")))
955*38fd1498Szrj   (set_attr "mode" "DI")])
956*38fd1498Szrj
957*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
958*38fd1498Szrj;;
959*38fd1498Szrj;; Parallel integral comparisons
960*38fd1498Szrj;;
961*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
962*38fd1498Szrj
963*38fd1498Szrj(define_expand "mmx_eq<mode>3"
964*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand")
965*38fd1498Szrj        (eq:MMXMODEI
966*38fd1498Szrj	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
967*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
968*38fd1498Szrj  "TARGET_MMX"
969*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
970*38fd1498Szrj
971*38fd1498Szrj(define_insn "*mmx_eq<mode>3"
972*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
973*38fd1498Szrj        (eq:MMXMODEI
974*38fd1498Szrj	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
975*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
976*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
977*38fd1498Szrj  "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
978*38fd1498Szrj  [(set_attr "type" "mmxcmp")
979*38fd1498Szrj   (set_attr "mode" "DI")])
980*38fd1498Szrj
981*38fd1498Szrj(define_insn "mmx_gt<mode>3"
982*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
983*38fd1498Szrj        (gt:MMXMODEI
984*38fd1498Szrj	  (match_operand:MMXMODEI 1 "register_operand" "0")
985*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
986*38fd1498Szrj  "TARGET_MMX"
987*38fd1498Szrj  "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
988*38fd1498Szrj  [(set_attr "type" "mmxcmp")
989*38fd1498Szrj   (set_attr "mode" "DI")])
990*38fd1498Szrj
991*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
992*38fd1498Szrj;;
993*38fd1498Szrj;; Parallel integral logical operations
994*38fd1498Szrj;;
995*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
996*38fd1498Szrj
997*38fd1498Szrj(define_insn "mmx_andnot<mode>3"
998*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
999*38fd1498Szrj	(and:MMXMODEI
1000*38fd1498Szrj	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
1001*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
1002*38fd1498Szrj  "TARGET_MMX"
1003*38fd1498Szrj  "pandn\t{%2, %0|%0, %2}"
1004*38fd1498Szrj  [(set_attr "type" "mmxadd")
1005*38fd1498Szrj   (set_attr "mode" "DI")])
1006*38fd1498Szrj
1007*38fd1498Szrj(define_expand "mmx_<code><mode>3"
1008*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand")
1009*38fd1498Szrj	(any_logic:MMXMODEI
1010*38fd1498Szrj	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
1011*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
1012*38fd1498Szrj  "TARGET_MMX"
1013*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1014*38fd1498Szrj
1015*38fd1498Szrj(define_insn "*mmx_<code><mode>3"
1016*38fd1498Szrj  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1017*38fd1498Szrj        (any_logic:MMXMODEI
1018*38fd1498Szrj	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
1019*38fd1498Szrj	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
1020*38fd1498Szrj  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1021*38fd1498Szrj  "p<logic>\t{%2, %0|%0, %2}"
1022*38fd1498Szrj  [(set_attr "type" "mmxadd")
1023*38fd1498Szrj   (set_attr "mode" "DI")])
1024*38fd1498Szrj
1025*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1026*38fd1498Szrj;;
1027*38fd1498Szrj;; Parallel integral element swizzling
1028*38fd1498Szrj;;
1029*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1030*38fd1498Szrj
1031*38fd1498Szrj(define_insn "mmx_packsswb"
1032*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
1033*38fd1498Szrj	(vec_concat:V8QI
1034*38fd1498Szrj	  (ss_truncate:V4QI
1035*38fd1498Szrj	    (match_operand:V4HI 1 "register_operand" "0"))
1036*38fd1498Szrj	  (ss_truncate:V4QI
1037*38fd1498Szrj	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
1038*38fd1498Szrj  "TARGET_MMX"
1039*38fd1498Szrj  "packsswb\t{%2, %0|%0, %2}"
1040*38fd1498Szrj  [(set_attr "type" "mmxshft")
1041*38fd1498Szrj   (set_attr "mode" "DI")])
1042*38fd1498Szrj
1043*38fd1498Szrj(define_insn "mmx_packssdw"
1044*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1045*38fd1498Szrj	(vec_concat:V4HI
1046*38fd1498Szrj	  (ss_truncate:V2HI
1047*38fd1498Szrj	    (match_operand:V2SI 1 "register_operand" "0"))
1048*38fd1498Szrj	  (ss_truncate:V2HI
1049*38fd1498Szrj	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
1050*38fd1498Szrj  "TARGET_MMX"
1051*38fd1498Szrj  "packssdw\t{%2, %0|%0, %2}"
1052*38fd1498Szrj  [(set_attr "type" "mmxshft")
1053*38fd1498Szrj   (set_attr "mode" "DI")])
1054*38fd1498Szrj
1055*38fd1498Szrj(define_insn "mmx_packuswb"
1056*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
1057*38fd1498Szrj	(vec_concat:V8QI
1058*38fd1498Szrj	  (us_truncate:V4QI
1059*38fd1498Szrj	    (match_operand:V4HI 1 "register_operand" "0"))
1060*38fd1498Szrj	  (us_truncate:V4QI
1061*38fd1498Szrj	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
1062*38fd1498Szrj  "TARGET_MMX"
1063*38fd1498Szrj  "packuswb\t{%2, %0|%0, %2}"
1064*38fd1498Szrj  [(set_attr "type" "mmxshft")
1065*38fd1498Szrj   (set_attr "mode" "DI")])
1066*38fd1498Szrj
1067*38fd1498Szrj(define_insn "mmx_punpckhbw"
1068*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
1069*38fd1498Szrj	(vec_select:V8QI
1070*38fd1498Szrj	  (vec_concat:V16QI
1071*38fd1498Szrj	    (match_operand:V8QI 1 "register_operand" "0")
1072*38fd1498Szrj	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
1073*38fd1498Szrj          (parallel [(const_int 4) (const_int 12)
1074*38fd1498Szrj                     (const_int 5) (const_int 13)
1075*38fd1498Szrj                     (const_int 6) (const_int 14)
1076*38fd1498Szrj                     (const_int 7) (const_int 15)])))]
1077*38fd1498Szrj  "TARGET_MMX"
1078*38fd1498Szrj  "punpckhbw\t{%2, %0|%0, %2}"
1079*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1080*38fd1498Szrj   (set_attr "mode" "DI")])
1081*38fd1498Szrj
1082*38fd1498Szrj(define_insn "mmx_punpcklbw"
1083*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
1084*38fd1498Szrj	(vec_select:V8QI
1085*38fd1498Szrj	  (vec_concat:V16QI
1086*38fd1498Szrj	    (match_operand:V8QI 1 "register_operand" "0")
1087*38fd1498Szrj	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
1088*38fd1498Szrj          (parallel [(const_int 0) (const_int 8)
1089*38fd1498Szrj                     (const_int 1) (const_int 9)
1090*38fd1498Szrj                     (const_int 2) (const_int 10)
1091*38fd1498Szrj                     (const_int 3) (const_int 11)])))]
1092*38fd1498Szrj  "TARGET_MMX"
1093*38fd1498Szrj  "punpcklbw\t{%2, %0|%0, %k2}"
1094*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1095*38fd1498Szrj   (set_attr "mode" "DI")])
1096*38fd1498Szrj
1097*38fd1498Szrj(define_insn "mmx_punpckhwd"
1098*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1099*38fd1498Szrj	(vec_select:V4HI
1100*38fd1498Szrj	  (vec_concat:V8HI
1101*38fd1498Szrj	    (match_operand:V4HI 1 "register_operand" "0")
1102*38fd1498Szrj	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
1103*38fd1498Szrj          (parallel [(const_int 2) (const_int 6)
1104*38fd1498Szrj                     (const_int 3) (const_int 7)])))]
1105*38fd1498Szrj  "TARGET_MMX"
1106*38fd1498Szrj  "punpckhwd\t{%2, %0|%0, %2}"
1107*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1108*38fd1498Szrj   (set_attr "mode" "DI")])
1109*38fd1498Szrj
1110*38fd1498Szrj(define_insn "mmx_punpcklwd"
1111*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1112*38fd1498Szrj	(vec_select:V4HI
1113*38fd1498Szrj	  (vec_concat:V8HI
1114*38fd1498Szrj	    (match_operand:V4HI 1 "register_operand" "0")
1115*38fd1498Szrj	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
1116*38fd1498Szrj          (parallel [(const_int 0) (const_int 4)
1117*38fd1498Szrj                     (const_int 1) (const_int 5)])))]
1118*38fd1498Szrj  "TARGET_MMX"
1119*38fd1498Szrj  "punpcklwd\t{%2, %0|%0, %k2}"
1120*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1121*38fd1498Szrj   (set_attr "mode" "DI")])
1122*38fd1498Szrj
1123*38fd1498Szrj(define_insn "mmx_punpckhdq"
1124*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
1125*38fd1498Szrj	(vec_select:V2SI
1126*38fd1498Szrj	  (vec_concat:V4SI
1127*38fd1498Szrj	    (match_operand:V2SI 1 "register_operand" "0")
1128*38fd1498Szrj	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
1129*38fd1498Szrj	  (parallel [(const_int 1)
1130*38fd1498Szrj		     (const_int 3)])))]
1131*38fd1498Szrj  "TARGET_MMX"
1132*38fd1498Szrj  "punpckhdq\t{%2, %0|%0, %2}"
1133*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1134*38fd1498Szrj   (set_attr "mode" "DI")])
1135*38fd1498Szrj
1136*38fd1498Szrj(define_insn "mmx_punpckldq"
1137*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
1138*38fd1498Szrj	(vec_select:V2SI
1139*38fd1498Szrj	  (vec_concat:V4SI
1140*38fd1498Szrj	    (match_operand:V2SI 1 "register_operand" "0")
1141*38fd1498Szrj	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
1142*38fd1498Szrj	  (parallel [(const_int 0)
1143*38fd1498Szrj		     (const_int 2)])))]
1144*38fd1498Szrj  "TARGET_MMX"
1145*38fd1498Szrj  "punpckldq\t{%2, %0|%0, %k2}"
1146*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1147*38fd1498Szrj   (set_attr "mode" "DI")])
1148*38fd1498Szrj
1149*38fd1498Szrj(define_expand "mmx_pinsrw"
1150*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
1151*38fd1498Szrj        (vec_merge:V4HI
1152*38fd1498Szrj          (vec_duplicate:V4HI
1153*38fd1498Szrj            (match_operand:SI 2 "nonimmediate_operand"))
1154*38fd1498Szrj	  (match_operand:V4HI 1 "register_operand")
1155*38fd1498Szrj          (match_operand:SI 3 "const_0_to_3_operand")))]
1156*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1157*38fd1498Szrj{
1158*38fd1498Szrj  operands[2] = gen_lowpart (HImode, operands[2]);
1159*38fd1498Szrj  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
1160*38fd1498Szrj})
1161*38fd1498Szrj
1162*38fd1498Szrj(define_insn "*mmx_pinsrw"
1163*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1164*38fd1498Szrj        (vec_merge:V4HI
1165*38fd1498Szrj          (vec_duplicate:V4HI
1166*38fd1498Szrj            (match_operand:HI 2 "nonimmediate_operand" "rm"))
1167*38fd1498Szrj	  (match_operand:V4HI 1 "register_operand" "0")
1168*38fd1498Szrj          (match_operand:SI 3 "const_int_operand")))]
1169*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW_A)
1170*38fd1498Szrj   && ((unsigned) exact_log2 (INTVAL (operands[3]))
1171*38fd1498Szrj       < GET_MODE_NUNITS (V4HImode))"
1172*38fd1498Szrj{
1173*38fd1498Szrj  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
1174*38fd1498Szrj  if (MEM_P (operands[2]))
1175*38fd1498Szrj    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
1176*38fd1498Szrj  else
1177*38fd1498Szrj    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
1178*38fd1498Szrj}
1179*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1180*38fd1498Szrj   (set_attr "length_immediate" "1")
1181*38fd1498Szrj   (set_attr "mode" "DI")])
1182*38fd1498Szrj
1183*38fd1498Szrj(define_insn "mmx_pextrw"
1184*38fd1498Szrj  [(set (match_operand:SI 0 "register_operand" "=r")
1185*38fd1498Szrj        (zero_extend:SI
1186*38fd1498Szrj	  (vec_select:HI
1187*38fd1498Szrj	    (match_operand:V4HI 1 "register_operand" "y")
1188*38fd1498Szrj	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
1189*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1190*38fd1498Szrj  "pextrw\t{%2, %1, %0|%0, %1, %2}"
1191*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1192*38fd1498Szrj   (set_attr "length_immediate" "1")
1193*38fd1498Szrj   (set_attr "mode" "DI")])
1194*38fd1498Szrj
1195*38fd1498Szrj(define_expand "mmx_pshufw"
1196*38fd1498Szrj  [(match_operand:V4HI 0 "register_operand")
1197*38fd1498Szrj   (match_operand:V4HI 1 "nonimmediate_operand")
1198*38fd1498Szrj   (match_operand:SI 2 "const_int_operand")]
1199*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1200*38fd1498Szrj{
1201*38fd1498Szrj  int mask = INTVAL (operands[2]);
1202*38fd1498Szrj  emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
1203*38fd1498Szrj                               GEN_INT ((mask >> 0) & 3),
1204*38fd1498Szrj                               GEN_INT ((mask >> 2) & 3),
1205*38fd1498Szrj                               GEN_INT ((mask >> 4) & 3),
1206*38fd1498Szrj                               GEN_INT ((mask >> 6) & 3)));
1207*38fd1498Szrj  DONE;
1208*38fd1498Szrj})
1209*38fd1498Szrj
1210*38fd1498Szrj(define_insn "mmx_pshufw_1"
1211*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1212*38fd1498Szrj        (vec_select:V4HI
1213*38fd1498Szrj          (match_operand:V4HI 1 "nonimmediate_operand" "ym")
1214*38fd1498Szrj          (parallel [(match_operand 2 "const_0_to_3_operand")
1215*38fd1498Szrj                     (match_operand 3 "const_0_to_3_operand")
1216*38fd1498Szrj                     (match_operand 4 "const_0_to_3_operand")
1217*38fd1498Szrj                     (match_operand 5 "const_0_to_3_operand")])))]
1218*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1219*38fd1498Szrj{
1220*38fd1498Szrj  int mask = 0;
1221*38fd1498Szrj  mask |= INTVAL (operands[2]) << 0;
1222*38fd1498Szrj  mask |= INTVAL (operands[3]) << 2;
1223*38fd1498Szrj  mask |= INTVAL (operands[4]) << 4;
1224*38fd1498Szrj  mask |= INTVAL (operands[5]) << 6;
1225*38fd1498Szrj  operands[2] = GEN_INT (mask);
1226*38fd1498Szrj
1227*38fd1498Szrj  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
1228*38fd1498Szrj}
1229*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1230*38fd1498Szrj   (set_attr "length_immediate" "1")
1231*38fd1498Szrj   (set_attr "mode" "DI")])
1232*38fd1498Szrj
1233*38fd1498Szrj(define_insn "mmx_pswapdv2si2"
1234*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
1235*38fd1498Szrj	(vec_select:V2SI
1236*38fd1498Szrj	  (match_operand:V2SI 1 "nonimmediate_operand" "ym")
1237*38fd1498Szrj	  (parallel [(const_int 1) (const_int 0)])))]
1238*38fd1498Szrj  "TARGET_3DNOW_A"
1239*38fd1498Szrj  "pswapd\t{%1, %0|%0, %1}"
1240*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1241*38fd1498Szrj   (set_attr "prefix_extra" "1")
1242*38fd1498Szrj   (set_attr "mode" "DI")])
1243*38fd1498Szrj
1244*38fd1498Szrj(define_insn "*vec_dupv4hi"
1245*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1246*38fd1498Szrj	(vec_duplicate:V4HI
1247*38fd1498Szrj	  (truncate:HI
1248*38fd1498Szrj	    (match_operand:SI 1 "register_operand" "0"))))]
1249*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1250*38fd1498Szrj  "pshufw\t{$0, %0, %0|%0, %0, 0}"
1251*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1252*38fd1498Szrj   (set_attr "length_immediate" "1")
1253*38fd1498Szrj   (set_attr "mode" "DI")])
1254*38fd1498Szrj
1255*38fd1498Szrj(define_insn "*vec_dupv2si"
1256*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand" "=y")
1257*38fd1498Szrj	(vec_duplicate:V2SI
1258*38fd1498Szrj	  (match_operand:SI 1 "register_operand" "0")))]
1259*38fd1498Szrj  "TARGET_MMX"
1260*38fd1498Szrj  "punpckldq\t%0, %0"
1261*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1262*38fd1498Szrj   (set_attr "mode" "DI")])
1263*38fd1498Szrj
1264*38fd1498Szrj(define_insn "*mmx_concatv2si"
1265*38fd1498Szrj  [(set (match_operand:V2SI 0 "register_operand"     "=y,y")
1266*38fd1498Szrj	(vec_concat:V2SI
1267*38fd1498Szrj	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
1268*38fd1498Szrj	  (match_operand:SI 2 "vector_move_operand"  "ym,C")))]
1269*38fd1498Szrj  "TARGET_MMX && !TARGET_SSE"
1270*38fd1498Szrj  "@
1271*38fd1498Szrj   punpckldq\t{%2, %0|%0, %2}
1272*38fd1498Szrj   movd\t{%1, %0|%0, %1}"
1273*38fd1498Szrj  [(set_attr "type" "mmxcvt,mmxmov")
1274*38fd1498Szrj   (set_attr "mode" "DI")])
1275*38fd1498Szrj
1276*38fd1498Szrj(define_expand "vec_setv2si"
1277*38fd1498Szrj  [(match_operand:V2SI 0 "register_operand")
1278*38fd1498Szrj   (match_operand:SI 1 "register_operand")
1279*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1280*38fd1498Szrj  "TARGET_MMX"
1281*38fd1498Szrj{
1282*38fd1498Szrj  ix86_expand_vector_set (false, operands[0], operands[1],
1283*38fd1498Szrj			  INTVAL (operands[2]));
1284*38fd1498Szrj  DONE;
1285*38fd1498Szrj})
1286*38fd1498Szrj
1287*38fd1498Szrj;; Avoid combining registers from different units in a single alternative,
1288*38fd1498Szrj;; see comment above inline_secondary_memory_needed function in i386.c
1289*38fd1498Szrj(define_insn_and_split "*vec_extractv2si_0"
1290*38fd1498Szrj  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,m,y, m,r")
1291*38fd1498Szrj	(vec_select:SI
1292*38fd1498Szrj	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
1293*38fd1498Szrj	  (parallel [(const_int 0)])))]
1294*38fd1498Szrj  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1295*38fd1498Szrj  "#"
1296*38fd1498Szrj  "&& reload_completed"
1297*38fd1498Szrj  [(set (match_dup 0) (match_dup 1))]
1298*38fd1498Szrj  "operands[1] = gen_lowpart (SImode, operands[1]);")
1299*38fd1498Szrj
1300*38fd1498Szrj;; Avoid combining registers from different units in a single alternative,
1301*38fd1498Szrj;; see comment above inline_secondary_memory_needed function in i386.c
1302*38fd1498Szrj(define_insn "*vec_extractv2si_1"
1303*38fd1498Szrj  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,x,x,y,x,r")
1304*38fd1498Szrj	(vec_select:SI
1305*38fd1498Szrj	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
1306*38fd1498Szrj	  (parallel [(const_int 1)])))]
1307*38fd1498Szrj  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1308*38fd1498Szrj  "@
1309*38fd1498Szrj   punpckhdq\t%0, %0
1310*38fd1498Szrj   %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
1311*38fd1498Szrj   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
1312*38fd1498Szrj   #
1313*38fd1498Szrj   #
1314*38fd1498Szrj   #"
1315*38fd1498Szrj  [(set_attr "isa" "*,sse2,noavx,*,*,*")
1316*38fd1498Szrj   (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
1317*38fd1498Szrj   (set (attr "length_immediate")
1318*38fd1498Szrj     (if_then_else (eq_attr "alternative" "1,2")
1319*38fd1498Szrj		   (const_string "1")
1320*38fd1498Szrj		   (const_string "*")))
1321*38fd1498Szrj   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig")
1322*38fd1498Szrj   (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")])
1323*38fd1498Szrj
1324*38fd1498Szrj(define_split
1325*38fd1498Szrj  [(set (match_operand:SI 0 "register_operand")
1326*38fd1498Szrj	(vec_select:SI
1327*38fd1498Szrj	  (match_operand:V2SI 1 "memory_operand")
1328*38fd1498Szrj	  (parallel [(const_int 1)])))]
1329*38fd1498Szrj  "TARGET_MMX && reload_completed"
1330*38fd1498Szrj  [(set (match_dup 0) (match_dup 1))]
1331*38fd1498Szrj  "operands[1] = adjust_address (operands[1], SImode, 4);")
1332*38fd1498Szrj
1333*38fd1498Szrj(define_insn_and_split "*vec_extractv2si_zext_mem"
1334*38fd1498Szrj  [(set (match_operand:DI 0 "register_operand" "=y,x,r")
1335*38fd1498Szrj	(zero_extend:DI
1336*38fd1498Szrj	  (vec_select:SI
1337*38fd1498Szrj	    (match_operand:V2SI 1 "memory_operand" "o,o,o")
1338*38fd1498Szrj	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
1339*38fd1498Szrj  "TARGET_64BIT && TARGET_MMX"
1340*38fd1498Szrj  "#"
1341*38fd1498Szrj  "&& reload_completed"
1342*38fd1498Szrj  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
1343*38fd1498Szrj{
1344*38fd1498Szrj  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
1345*38fd1498Szrj})
1346*38fd1498Szrj
1347*38fd1498Szrj(define_expand "vec_extractv2sisi"
1348*38fd1498Szrj  [(match_operand:SI 0 "register_operand")
1349*38fd1498Szrj   (match_operand:V2SI 1 "register_operand")
1350*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1351*38fd1498Szrj  "TARGET_MMX"
1352*38fd1498Szrj{
1353*38fd1498Szrj  ix86_expand_vector_extract (false, operands[0], operands[1],
1354*38fd1498Szrj			      INTVAL (operands[2]));
1355*38fd1498Szrj  DONE;
1356*38fd1498Szrj})
1357*38fd1498Szrj
1358*38fd1498Szrj(define_expand "vec_initv2sisi"
1359*38fd1498Szrj  [(match_operand:V2SI 0 "register_operand")
1360*38fd1498Szrj   (match_operand 1)]
1361*38fd1498Szrj  "TARGET_SSE"
1362*38fd1498Szrj{
1363*38fd1498Szrj  ix86_expand_vector_init (false, operands[0], operands[1]);
1364*38fd1498Szrj  DONE;
1365*38fd1498Szrj})
1366*38fd1498Szrj
1367*38fd1498Szrj(define_expand "vec_setv4hi"
1368*38fd1498Szrj  [(match_operand:V4HI 0 "register_operand")
1369*38fd1498Szrj   (match_operand:HI 1 "register_operand")
1370*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1371*38fd1498Szrj  "TARGET_MMX"
1372*38fd1498Szrj{
1373*38fd1498Szrj  ix86_expand_vector_set (false, operands[0], operands[1],
1374*38fd1498Szrj			  INTVAL (operands[2]));
1375*38fd1498Szrj  DONE;
1376*38fd1498Szrj})
1377*38fd1498Szrj
1378*38fd1498Szrj(define_expand "vec_extractv4hihi"
1379*38fd1498Szrj  [(match_operand:HI 0 "register_operand")
1380*38fd1498Szrj   (match_operand:V4HI 1 "register_operand")
1381*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1382*38fd1498Szrj  "TARGET_MMX"
1383*38fd1498Szrj{
1384*38fd1498Szrj  ix86_expand_vector_extract (false, operands[0], operands[1],
1385*38fd1498Szrj			      INTVAL (operands[2]));
1386*38fd1498Szrj  DONE;
1387*38fd1498Szrj})
1388*38fd1498Szrj
1389*38fd1498Szrj(define_expand "vec_initv4hihi"
1390*38fd1498Szrj  [(match_operand:V4HI 0 "register_operand")
1391*38fd1498Szrj   (match_operand 1)]
1392*38fd1498Szrj  "TARGET_SSE"
1393*38fd1498Szrj{
1394*38fd1498Szrj  ix86_expand_vector_init (false, operands[0], operands[1]);
1395*38fd1498Szrj  DONE;
1396*38fd1498Szrj})
1397*38fd1498Szrj
1398*38fd1498Szrj(define_expand "vec_setv8qi"
1399*38fd1498Szrj  [(match_operand:V8QI 0 "register_operand")
1400*38fd1498Szrj   (match_operand:QI 1 "register_operand")
1401*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1402*38fd1498Szrj  "TARGET_MMX"
1403*38fd1498Szrj{
1404*38fd1498Szrj  ix86_expand_vector_set (false, operands[0], operands[1],
1405*38fd1498Szrj			  INTVAL (operands[2]));
1406*38fd1498Szrj  DONE;
1407*38fd1498Szrj})
1408*38fd1498Szrj
1409*38fd1498Szrj(define_expand "vec_extractv8qiqi"
1410*38fd1498Szrj  [(match_operand:QI 0 "register_operand")
1411*38fd1498Szrj   (match_operand:V8QI 1 "register_operand")
1412*38fd1498Szrj   (match_operand 2 "const_int_operand")]
1413*38fd1498Szrj  "TARGET_MMX"
1414*38fd1498Szrj{
1415*38fd1498Szrj  ix86_expand_vector_extract (false, operands[0], operands[1],
1416*38fd1498Szrj			      INTVAL (operands[2]));
1417*38fd1498Szrj  DONE;
1418*38fd1498Szrj})
1419*38fd1498Szrj
1420*38fd1498Szrj(define_expand "vec_initv8qiqi"
1421*38fd1498Szrj  [(match_operand:V8QI 0 "register_operand")
1422*38fd1498Szrj   (match_operand 1)]
1423*38fd1498Szrj  "TARGET_SSE"
1424*38fd1498Szrj{
1425*38fd1498Szrj  ix86_expand_vector_init (false, operands[0], operands[1]);
1426*38fd1498Szrj  DONE;
1427*38fd1498Szrj})
1428*38fd1498Szrj
1429*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430*38fd1498Szrj;;
1431*38fd1498Szrj;; Miscellaneous
1432*38fd1498Szrj;;
1433*38fd1498Szrj;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1434*38fd1498Szrj
1435*38fd1498Szrj(define_expand "mmx_uavgv8qi3"
1436*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand")
1437*38fd1498Szrj	(truncate:V8QI
1438*38fd1498Szrj	  (lshiftrt:V8HI
1439*38fd1498Szrj	    (plus:V8HI
1440*38fd1498Szrj	      (plus:V8HI
1441*38fd1498Szrj		(zero_extend:V8HI
1442*38fd1498Szrj		  (match_operand:V8QI 1 "nonimmediate_operand"))
1443*38fd1498Szrj		(zero_extend:V8HI
1444*38fd1498Szrj		  (match_operand:V8QI 2 "nonimmediate_operand")))
1445*38fd1498Szrj	      (const_vector:V8HI [(const_int 1) (const_int 1)
1446*38fd1498Szrj				  (const_int 1) (const_int 1)
1447*38fd1498Szrj				  (const_int 1) (const_int 1)
1448*38fd1498Szrj				  (const_int 1) (const_int 1)]))
1449*38fd1498Szrj	    (const_int 1))))]
1450*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW"
1451*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
1452*38fd1498Szrj
1453*38fd1498Szrj(define_insn "*mmx_uavgv8qi3"
1454*38fd1498Szrj  [(set (match_operand:V8QI 0 "register_operand" "=y")
1455*38fd1498Szrj	(truncate:V8QI
1456*38fd1498Szrj	  (lshiftrt:V8HI
1457*38fd1498Szrj	    (plus:V8HI
1458*38fd1498Szrj	      (plus:V8HI
1459*38fd1498Szrj		(zero_extend:V8HI
1460*38fd1498Szrj		  (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
1461*38fd1498Szrj		(zero_extend:V8HI
1462*38fd1498Szrj		  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
1463*38fd1498Szrj	      (const_vector:V8HI [(const_int 1) (const_int 1)
1464*38fd1498Szrj				  (const_int 1) (const_int 1)
1465*38fd1498Szrj				  (const_int 1) (const_int 1)
1466*38fd1498Szrj				  (const_int 1) (const_int 1)]))
1467*38fd1498Szrj	    (const_int 1))))]
1468*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW)
1469*38fd1498Szrj   && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
1470*38fd1498Szrj{
1471*38fd1498Szrj  /* These two instructions have the same operation, but their encoding
1472*38fd1498Szrj     is different.  Prefer the one that is de facto standard.  */
1473*38fd1498Szrj  if (TARGET_SSE || TARGET_3DNOW_A)
1474*38fd1498Szrj    return "pavgb\t{%2, %0|%0, %2}";
1475*38fd1498Szrj  else
1476*38fd1498Szrj    return "pavgusb\t{%2, %0|%0, %2}";
1477*38fd1498Szrj}
1478*38fd1498Szrj  [(set_attr "type" "mmxshft")
1479*38fd1498Szrj   (set (attr "prefix_extra")
1480*38fd1498Szrj     (if_then_else
1481*38fd1498Szrj       (not (ior (match_test "TARGET_SSE")
1482*38fd1498Szrj		 (match_test "TARGET_3DNOW_A")))
1483*38fd1498Szrj       (const_string "1")
1484*38fd1498Szrj       (const_string "*")))
1485*38fd1498Szrj   (set_attr "mode" "DI")])
1486*38fd1498Szrj
1487*38fd1498Szrj(define_expand "mmx_uavgv4hi3"
1488*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand")
1489*38fd1498Szrj	(truncate:V4HI
1490*38fd1498Szrj	  (lshiftrt:V4SI
1491*38fd1498Szrj	    (plus:V4SI
1492*38fd1498Szrj	      (plus:V4SI
1493*38fd1498Szrj		(zero_extend:V4SI
1494*38fd1498Szrj		  (match_operand:V4HI 1 "nonimmediate_operand"))
1495*38fd1498Szrj		(zero_extend:V4SI
1496*38fd1498Szrj		  (match_operand:V4HI 2 "nonimmediate_operand")))
1497*38fd1498Szrj	      (const_vector:V4SI [(const_int 1) (const_int 1)
1498*38fd1498Szrj				  (const_int 1) (const_int 1)]))
1499*38fd1498Szrj	    (const_int 1))))]
1500*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1501*38fd1498Szrj  "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
1502*38fd1498Szrj
1503*38fd1498Szrj(define_insn "*mmx_uavgv4hi3"
1504*38fd1498Szrj  [(set (match_operand:V4HI 0 "register_operand" "=y")
1505*38fd1498Szrj	(truncate:V4HI
1506*38fd1498Szrj	  (lshiftrt:V4SI
1507*38fd1498Szrj	    (plus:V4SI
1508*38fd1498Szrj	      (plus:V4SI
1509*38fd1498Szrj		(zero_extend:V4SI
1510*38fd1498Szrj		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
1511*38fd1498Szrj		(zero_extend:V4SI
1512*38fd1498Szrj		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
1513*38fd1498Szrj	      (const_vector:V4SI [(const_int 1) (const_int 1)
1514*38fd1498Szrj				  (const_int 1) (const_int 1)]))
1515*38fd1498Szrj	    (const_int 1))))]
1516*38fd1498Szrj  "(TARGET_SSE || TARGET_3DNOW_A)
1517*38fd1498Szrj   && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
1518*38fd1498Szrj  "pavgw\t{%2, %0|%0, %2}"
1519*38fd1498Szrj  [(set_attr "type" "mmxshft")
1520*38fd1498Szrj   (set_attr "mode" "DI")])
1521*38fd1498Szrj
1522*38fd1498Szrj(define_insn "mmx_psadbw"
1523*38fd1498Szrj  [(set (match_operand:V1DI 0 "register_operand" "=y")
1524*38fd1498Szrj        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
1525*38fd1498Szrj		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
1526*38fd1498Szrj		     UNSPEC_PSADBW))]
1527*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1528*38fd1498Szrj  "psadbw\t{%2, %0|%0, %2}"
1529*38fd1498Szrj  [(set_attr "type" "mmxshft")
1530*38fd1498Szrj   (set_attr "mode" "DI")])
1531*38fd1498Szrj
1532*38fd1498Szrj(define_insn "mmx_pmovmskb"
1533*38fd1498Szrj  [(set (match_operand:SI 0 "register_operand" "=r")
1534*38fd1498Szrj	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
1535*38fd1498Szrj		   UNSPEC_MOVMSK))]
1536*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1537*38fd1498Szrj  "pmovmskb\t{%1, %0|%0, %1}"
1538*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1539*38fd1498Szrj   (set_attr "mode" "DI")])
1540*38fd1498Szrj
1541*38fd1498Szrj(define_expand "mmx_maskmovq"
1542*38fd1498Szrj  [(set (match_operand:V8QI 0 "memory_operand")
1543*38fd1498Szrj	(unspec:V8QI [(match_operand:V8QI 1 "register_operand")
1544*38fd1498Szrj		      (match_operand:V8QI 2 "register_operand")
1545*38fd1498Szrj		      (match_dup 0)]
1546*38fd1498Szrj		     UNSPEC_MASKMOV))]
1547*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A")
1548*38fd1498Szrj
1549*38fd1498Szrj(define_insn "*mmx_maskmovq"
1550*38fd1498Szrj  [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
1551*38fd1498Szrj	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
1552*38fd1498Szrj		      (match_operand:V8QI 2 "register_operand" "y")
1553*38fd1498Szrj		      (mem:V8QI (match_dup 0))]
1554*38fd1498Szrj		     UNSPEC_MASKMOV))]
1555*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
1556*38fd1498Szrj  ;; @@@ check ordering of operands in intel/nonintel syntax
1557*38fd1498Szrj  "maskmovq\t{%2, %1|%1, %2}"
1558*38fd1498Szrj  [(set_attr "type" "mmxcvt")
1559*38fd1498Szrj   (set_attr "znver1_decode" "vector")
1560*38fd1498Szrj   (set_attr "mode" "DI")])
1561*38fd1498Szrj
1562*38fd1498Szrj(define_expand "mmx_emms"
1563*38fd1498Szrj  [(match_par_dup 0 [(const_int 0)])]
1564*38fd1498Szrj  "TARGET_MMX"
1565*38fd1498Szrj{
1566*38fd1498Szrj  int regno;
1567*38fd1498Szrj
1568*38fd1498Szrj  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
1569*38fd1498Szrj
1570*38fd1498Szrj  XVECEXP (operands[0], 0, 0)
1571*38fd1498Szrj    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
1572*38fd1498Szrj			       UNSPECV_EMMS);
1573*38fd1498Szrj
1574*38fd1498Szrj  for (regno = 0; regno < 8; regno++)
1575*38fd1498Szrj    {
1576*38fd1498Szrj      XVECEXP (operands[0], 0, regno + 1)
1577*38fd1498Szrj	= gen_rtx_CLOBBER (VOIDmode,
1578*38fd1498Szrj			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
1579*38fd1498Szrj
1580*38fd1498Szrj      XVECEXP (operands[0], 0, regno + 9)
1581*38fd1498Szrj	= gen_rtx_CLOBBER (VOIDmode,
1582*38fd1498Szrj			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
1583*38fd1498Szrj    }
1584*38fd1498Szrj})
1585*38fd1498Szrj
1586*38fd1498Szrj(define_insn "*mmx_emms"
1587*38fd1498Szrj  [(match_parallel 0 "emms_operation"
1588*38fd1498Szrj    [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)])]
1589*38fd1498Szrj  "TARGET_MMX"
1590*38fd1498Szrj  "emms"
1591*38fd1498Szrj  [(set_attr "type" "mmx")
1592*38fd1498Szrj   (set_attr "modrm" "0")
1593*38fd1498Szrj   (set_attr "memory" "none")])
1594*38fd1498Szrj
1595*38fd1498Szrj(define_expand "mmx_femms"
1596*38fd1498Szrj  [(match_par_dup 0 [(const_int 0)])]
1597*38fd1498Szrj  "TARGET_3DNOW"
1598*38fd1498Szrj{
1599*38fd1498Szrj  int regno;
1600*38fd1498Szrj
1601*38fd1498Szrj  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
1602*38fd1498Szrj
1603*38fd1498Szrj  XVECEXP (operands[0], 0, 0)
1604*38fd1498Szrj    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
1605*38fd1498Szrj			       UNSPECV_FEMMS);
1606*38fd1498Szrj
1607*38fd1498Szrj  for (regno = 0; regno < 8; regno++)
1608*38fd1498Szrj    {
1609*38fd1498Szrj      XVECEXP (operands[0], 0, regno + 1)
1610*38fd1498Szrj	= gen_rtx_CLOBBER (VOIDmode,
1611*38fd1498Szrj			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
1612*38fd1498Szrj
1613*38fd1498Szrj      XVECEXP (operands[0], 0, regno + 9)
1614*38fd1498Szrj	= gen_rtx_CLOBBER (VOIDmode,
1615*38fd1498Szrj			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
1616*38fd1498Szrj    }
1617*38fd1498Szrj})
1618*38fd1498Szrj
1619*38fd1498Szrj(define_insn "*mmx_femms"
1620*38fd1498Szrj  [(match_parallel 0 "emms_operation"
1621*38fd1498Szrj    [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)])]
1622*38fd1498Szrj  "TARGET_3DNOW"
1623*38fd1498Szrj  "femms"
1624*38fd1498Szrj  [(set_attr "type" "mmx")
1625*38fd1498Szrj   (set_attr "modrm" "0")
1626*38fd1498Szrj   (set_attr "memory" "none")])
1627