xref: /minix3/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSSE.td (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1f4a2713aSLionel Sambuc//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
2f4a2713aSLionel Sambuc//
3f4a2713aSLionel Sambuc//                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc//
5f4a2713aSLionel Sambuc// This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc// License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc//
8f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc//
10f4a2713aSLionel Sambuc// This file describes the X86 SSE instruction set, defining the instructions,
11f4a2713aSLionel Sambuc// and properties of the instructions which are needed for code generation,
12f4a2713aSLionel Sambuc// machine code emission, and analysis.
13f4a2713aSLionel Sambuc//
14f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
15f4a2713aSLionel Sambuc
16f4a2713aSLionel Sambucclass OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
17f4a2713aSLionel Sambuc  InstrItinClass rr = arg_rr;
18f4a2713aSLionel Sambuc  InstrItinClass rm = arg_rm;
19f4a2713aSLionel Sambuc  // InstrSchedModel info.
20f4a2713aSLionel Sambuc  X86FoldableSchedWrite Sched = WriteFAdd;
21f4a2713aSLionel Sambuc}
22f4a2713aSLionel Sambuc
23f4a2713aSLionel Sambucclass SizeItins<OpndItins arg_s, OpndItins arg_d> {
24f4a2713aSLionel Sambuc  OpndItins s = arg_s;
25f4a2713aSLionel Sambuc  OpndItins d = arg_d;
26f4a2713aSLionel Sambuc}
27f4a2713aSLionel Sambuc
28f4a2713aSLionel Sambuc
29f4a2713aSLionel Sambucclass ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
30f4a2713aSLionel Sambuc  InstrItinClass arg_ri> {
31f4a2713aSLionel Sambuc  InstrItinClass rr = arg_rr;
32f4a2713aSLionel Sambuc  InstrItinClass rm = arg_rm;
33f4a2713aSLionel Sambuc  InstrItinClass ri = arg_ri;
34f4a2713aSLionel Sambuc}
35f4a2713aSLionel Sambuc
36f4a2713aSLionel Sambuc
37f4a2713aSLionel Sambuc// scalar
38f4a2713aSLionel Sambuclet Sched = WriteFAdd in {
39f4a2713aSLionel Sambucdef SSE_ALU_F32S : OpndItins<
40f4a2713aSLionel Sambuc  IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
41f4a2713aSLionel Sambuc>;
42f4a2713aSLionel Sambuc
43f4a2713aSLionel Sambucdef SSE_ALU_F64S : OpndItins<
44f4a2713aSLionel Sambuc  IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
45f4a2713aSLionel Sambuc>;
46f4a2713aSLionel Sambuc}
47f4a2713aSLionel Sambuc
48f4a2713aSLionel Sambucdef SSE_ALU_ITINS_S : SizeItins<
49f4a2713aSLionel Sambuc  SSE_ALU_F32S, SSE_ALU_F64S
50f4a2713aSLionel Sambuc>;
51f4a2713aSLionel Sambuc
52f4a2713aSLionel Sambuclet Sched = WriteFMul in {
53f4a2713aSLionel Sambucdef SSE_MUL_F32S : OpndItins<
54f4a2713aSLionel Sambuc  IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
55f4a2713aSLionel Sambuc>;
56f4a2713aSLionel Sambuc
57f4a2713aSLionel Sambucdef SSE_MUL_F64S : OpndItins<
58f4a2713aSLionel Sambuc  IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
59f4a2713aSLionel Sambuc>;
60f4a2713aSLionel Sambuc}
61f4a2713aSLionel Sambuc
62f4a2713aSLionel Sambucdef SSE_MUL_ITINS_S : SizeItins<
63f4a2713aSLionel Sambuc  SSE_MUL_F32S, SSE_MUL_F64S
64f4a2713aSLionel Sambuc>;
65f4a2713aSLionel Sambuc
66f4a2713aSLionel Sambuclet Sched = WriteFDiv in {
67f4a2713aSLionel Sambucdef SSE_DIV_F32S : OpndItins<
68f4a2713aSLionel Sambuc  IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
69f4a2713aSLionel Sambuc>;
70f4a2713aSLionel Sambuc
71f4a2713aSLionel Sambucdef SSE_DIV_F64S : OpndItins<
72f4a2713aSLionel Sambuc  IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
73f4a2713aSLionel Sambuc>;
74f4a2713aSLionel Sambuc}
75f4a2713aSLionel Sambuc
76f4a2713aSLionel Sambucdef SSE_DIV_ITINS_S : SizeItins<
77f4a2713aSLionel Sambuc  SSE_DIV_F32S, SSE_DIV_F64S
78f4a2713aSLionel Sambuc>;
79f4a2713aSLionel Sambuc
80f4a2713aSLionel Sambuc// parallel
81f4a2713aSLionel Sambuclet Sched = WriteFAdd in {
82f4a2713aSLionel Sambucdef SSE_ALU_F32P : OpndItins<
83f4a2713aSLionel Sambuc  IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
84f4a2713aSLionel Sambuc>;
85f4a2713aSLionel Sambuc
86f4a2713aSLionel Sambucdef SSE_ALU_F64P : OpndItins<
87f4a2713aSLionel Sambuc  IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
88f4a2713aSLionel Sambuc>;
89f4a2713aSLionel Sambuc}
90f4a2713aSLionel Sambuc
91f4a2713aSLionel Sambucdef SSE_ALU_ITINS_P : SizeItins<
92f4a2713aSLionel Sambuc  SSE_ALU_F32P, SSE_ALU_F64P
93f4a2713aSLionel Sambuc>;
94f4a2713aSLionel Sambuc
95f4a2713aSLionel Sambuclet Sched = WriteFMul in {
96f4a2713aSLionel Sambucdef SSE_MUL_F32P : OpndItins<
97f4a2713aSLionel Sambuc  IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
98f4a2713aSLionel Sambuc>;
99f4a2713aSLionel Sambuc
100f4a2713aSLionel Sambucdef SSE_MUL_F64P : OpndItins<
101f4a2713aSLionel Sambuc  IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
102f4a2713aSLionel Sambuc>;
103f4a2713aSLionel Sambuc}
104f4a2713aSLionel Sambuc
105f4a2713aSLionel Sambucdef SSE_MUL_ITINS_P : SizeItins<
106f4a2713aSLionel Sambuc  SSE_MUL_F32P, SSE_MUL_F64P
107f4a2713aSLionel Sambuc>;
108f4a2713aSLionel Sambuc
109f4a2713aSLionel Sambuclet Sched = WriteFDiv in {
110f4a2713aSLionel Sambucdef SSE_DIV_F32P : OpndItins<
111f4a2713aSLionel Sambuc  IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
112f4a2713aSLionel Sambuc>;
113f4a2713aSLionel Sambuc
114f4a2713aSLionel Sambucdef SSE_DIV_F64P : OpndItins<
115f4a2713aSLionel Sambuc  IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
116f4a2713aSLionel Sambuc>;
117f4a2713aSLionel Sambuc}
118f4a2713aSLionel Sambuc
119f4a2713aSLionel Sambucdef SSE_DIV_ITINS_P : SizeItins<
120f4a2713aSLionel Sambuc  SSE_DIV_F32P, SSE_DIV_F64P
121f4a2713aSLionel Sambuc>;
122f4a2713aSLionel Sambuc
123*0a6a1f1dSLionel Sambuclet Sched = WriteVecLogic in
124*0a6a1f1dSLionel Sambucdef SSE_VEC_BIT_ITINS_P : OpndItins<
125*0a6a1f1dSLionel Sambuc  IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
126*0a6a1f1dSLionel Sambuc>;
127*0a6a1f1dSLionel Sambuc
128f4a2713aSLionel Sambucdef SSE_BIT_ITINS_P : OpndItins<
129f4a2713aSLionel Sambuc  IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
130f4a2713aSLionel Sambuc>;
131f4a2713aSLionel Sambuc
132f4a2713aSLionel Sambuclet Sched = WriteVecALU in {
133f4a2713aSLionel Sambucdef SSE_INTALU_ITINS_P : OpndItins<
134f4a2713aSLionel Sambuc  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
135f4a2713aSLionel Sambuc>;
136f4a2713aSLionel Sambuc
137f4a2713aSLionel Sambucdef SSE_INTALUQ_ITINS_P : OpndItins<
138f4a2713aSLionel Sambuc  IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
139f4a2713aSLionel Sambuc>;
140f4a2713aSLionel Sambuc}
141f4a2713aSLionel Sambuc
142f4a2713aSLionel Sambuclet Sched = WriteVecIMul in
143f4a2713aSLionel Sambucdef SSE_INTMUL_ITINS_P : OpndItins<
144f4a2713aSLionel Sambuc  IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
145f4a2713aSLionel Sambuc>;
146f4a2713aSLionel Sambuc
147f4a2713aSLionel Sambucdef SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
148f4a2713aSLionel Sambuc  IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
149f4a2713aSLionel Sambuc>;
150f4a2713aSLionel Sambuc
151f4a2713aSLionel Sambucdef SSE_MOVA_ITINS : OpndItins<
152f4a2713aSLionel Sambuc  IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM
153f4a2713aSLionel Sambuc>;
154f4a2713aSLionel Sambuc
155f4a2713aSLionel Sambucdef SSE_MOVU_ITINS : OpndItins<
156f4a2713aSLionel Sambuc  IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
157f4a2713aSLionel Sambuc>;
158f4a2713aSLionel Sambuc
159f4a2713aSLionel Sambucdef SSE_DPPD_ITINS : OpndItins<
160f4a2713aSLionel Sambuc  IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM
161f4a2713aSLionel Sambuc>;
162f4a2713aSLionel Sambuc
163f4a2713aSLionel Sambucdef SSE_DPPS_ITINS : OpndItins<
164f4a2713aSLionel Sambuc  IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM
165f4a2713aSLionel Sambuc>;
166f4a2713aSLionel Sambuc
167f4a2713aSLionel Sambucdef DEFAULT_ITINS : OpndItins<
168f4a2713aSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
169f4a2713aSLionel Sambuc>;
170f4a2713aSLionel Sambuc
171f4a2713aSLionel Sambucdef SSE_EXTRACT_ITINS : OpndItins<
172f4a2713aSLionel Sambuc  IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM
173f4a2713aSLionel Sambuc>;
174f4a2713aSLionel Sambuc
175f4a2713aSLionel Sambucdef SSE_INSERT_ITINS : OpndItins<
176f4a2713aSLionel Sambuc  IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM
177f4a2713aSLionel Sambuc>;
178f4a2713aSLionel Sambuc
179*0a6a1f1dSLionel Sambuclet Sched = WriteMPSAD in
180f4a2713aSLionel Sambucdef SSE_MPSADBW_ITINS : OpndItins<
181f4a2713aSLionel Sambuc  IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
182f4a2713aSLionel Sambuc>;
183f4a2713aSLionel Sambuc
184*0a6a1f1dSLionel Sambuclet Sched = WriteVecIMul in
185f4a2713aSLionel Sambucdef SSE_PMULLD_ITINS : OpndItins<
186f4a2713aSLionel Sambuc  IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
187f4a2713aSLionel Sambuc>;
188f4a2713aSLionel Sambuc
189*0a6a1f1dSLionel Sambuc// Definitions for backward compatibility.
190*0a6a1f1dSLionel Sambuc// The instructions mapped on these definitions uses a different itinerary
191*0a6a1f1dSLionel Sambuc// than the actual scheduling model.
192*0a6a1f1dSLionel Sambuclet Sched = WriteShuffle in
193*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_SHUFFLESCHED :  OpndItins<
194*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
195*0a6a1f1dSLionel Sambuc>;
196*0a6a1f1dSLionel Sambuc
197*0a6a1f1dSLionel Sambuclet Sched = WriteVecIMul in
198*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_VECIMULSCHED :  OpndItins<
199*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
200*0a6a1f1dSLionel Sambuc>;
201*0a6a1f1dSLionel Sambuc
202*0a6a1f1dSLionel Sambuclet Sched = WriteShuffle in
203*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_SHUFF_P : OpndItins<
204*0a6a1f1dSLionel Sambuc  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
205*0a6a1f1dSLionel Sambuc>;
206*0a6a1f1dSLionel Sambuc
207*0a6a1f1dSLionel Sambuclet Sched = WriteMPSAD in
208*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_MPSADSCHED :  OpndItins<
209*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
210*0a6a1f1dSLionel Sambuc>;
211*0a6a1f1dSLionel Sambuc
212*0a6a1f1dSLionel Sambuclet Sched = WriteFBlend in
213*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_FBLENDSCHED :  OpndItins<
214*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
215*0a6a1f1dSLionel Sambuc>;
216*0a6a1f1dSLionel Sambuc
217*0a6a1f1dSLionel Sambuclet Sched = WriteBlend in
218*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_BLENDSCHED :  OpndItins<
219*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
220*0a6a1f1dSLionel Sambuc>;
221*0a6a1f1dSLionel Sambuc
222*0a6a1f1dSLionel Sambuclet Sched = WriteVarBlend in
223*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_VARBLENDSCHED :  OpndItins<
224*0a6a1f1dSLionel Sambuc  IIC_ALU_NONMEM, IIC_ALU_MEM
225*0a6a1f1dSLionel Sambuc>;
226*0a6a1f1dSLionel Sambuc
227*0a6a1f1dSLionel Sambuclet Sched = WriteFBlend in
228*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_FBLEND_P : OpndItins<
229*0a6a1f1dSLionel Sambuc  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
230*0a6a1f1dSLionel Sambuc>;
231*0a6a1f1dSLionel Sambuc
232*0a6a1f1dSLionel Sambuclet Sched = WriteBlend in
233*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_BLEND_P : OpndItins<
234*0a6a1f1dSLionel Sambuc  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
235*0a6a1f1dSLionel Sambuc>;
236*0a6a1f1dSLionel Sambuc
237f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
238f4a2713aSLionel Sambuc// SSE 1 & 2 Instructions Classes
239f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
240f4a2713aSLionel Sambuc
241f4a2713aSLionel Sambuc/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
242f4a2713aSLionel Sambucmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
243f4a2713aSLionel Sambuc                           RegisterClass RC, X86MemOperand x86memop,
244f4a2713aSLionel Sambuc                           OpndItins itins,
245f4a2713aSLionel Sambuc                           bit Is2Addr = 1> {
246f4a2713aSLionel Sambuc  let isCommutable = 1 in {
247f4a2713aSLionel Sambuc    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
248f4a2713aSLionel Sambuc       !if(Is2Addr,
249f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
250f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
251f4a2713aSLionel Sambuc       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
252f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
253f4a2713aSLionel Sambuc  }
254f4a2713aSLionel Sambuc  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
255f4a2713aSLionel Sambuc       !if(Is2Addr,
256f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
257f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
258f4a2713aSLionel Sambuc       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
259f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
260f4a2713aSLionel Sambuc}
261f4a2713aSLionel Sambuc
262f4a2713aSLionel Sambuc/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
263f4a2713aSLionel Sambucmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
264f4a2713aSLionel Sambuc                             string asm, string SSEVer, string FPSizeStr,
265f4a2713aSLionel Sambuc                             Operand memopr, ComplexPattern mem_cpat,
266f4a2713aSLionel Sambuc                             OpndItins itins,
267f4a2713aSLionel Sambuc                             bit Is2Addr = 1> {
268*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
269f4a2713aSLionel Sambuc  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
270f4a2713aSLionel Sambuc       !if(Is2Addr,
271f4a2713aSLionel Sambuc           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
272f4a2713aSLionel Sambuc           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
273f4a2713aSLionel Sambuc       [(set RC:$dst, (!cast<Intrinsic>(
274f4a2713aSLionel Sambuc                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
275f4a2713aSLionel Sambuc             RC:$src1, RC:$src2))], itins.rr>,
276f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
277f4a2713aSLionel Sambuc  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
278f4a2713aSLionel Sambuc       !if(Is2Addr,
279f4a2713aSLionel Sambuc           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
280f4a2713aSLionel Sambuc           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
281f4a2713aSLionel Sambuc       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
282f4a2713aSLionel Sambuc                                          SSEVer, "_", OpcodeStr, FPSizeStr))
283f4a2713aSLionel Sambuc             RC:$src1, mem_cpat:$src2))], itins.rm>,
284f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
285f4a2713aSLionel Sambuc}
286*0a6a1f1dSLionel Sambuc}
287f4a2713aSLionel Sambuc
288f4a2713aSLionel Sambuc/// sse12_fp_packed - SSE 1 & 2 packed instructions class
289f4a2713aSLionel Sambucmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
290f4a2713aSLionel Sambuc                           RegisterClass RC, ValueType vt,
291f4a2713aSLionel Sambuc                           X86MemOperand x86memop, PatFrag mem_frag,
292f4a2713aSLionel Sambuc                           Domain d, OpndItins itins, bit Is2Addr = 1> {
293f4a2713aSLionel Sambuc  let isCommutable = 1 in
294f4a2713aSLionel Sambuc    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
295f4a2713aSLionel Sambuc       !if(Is2Addr,
296f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
297f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
298f4a2713aSLionel Sambuc       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
299f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
300f4a2713aSLionel Sambuc  let mayLoad = 1 in
301f4a2713aSLionel Sambuc    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
302f4a2713aSLionel Sambuc       !if(Is2Addr,
303f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
304f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
305f4a2713aSLionel Sambuc       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
306f4a2713aSLionel Sambuc          itins.rm, d>,
307f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
308f4a2713aSLionel Sambuc}
309f4a2713aSLionel Sambuc
310f4a2713aSLionel Sambuc/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
311f4a2713aSLionel Sambucmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
312f4a2713aSLionel Sambuc                                      string OpcodeStr, X86MemOperand x86memop,
313f4a2713aSLionel Sambuc                                      list<dag> pat_rr, list<dag> pat_rm,
314f4a2713aSLionel Sambuc                                      bit Is2Addr = 1> {
315f4a2713aSLionel Sambuc  let isCommutable = 1, hasSideEffects = 0 in
316f4a2713aSLionel Sambuc    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
317f4a2713aSLionel Sambuc       !if(Is2Addr,
318f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
319f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
320f4a2713aSLionel Sambuc       pat_rr, NoItinerary, d>,
321f4a2713aSLionel Sambuc       Sched<[WriteVecLogic]>;
322f4a2713aSLionel Sambuc  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
323f4a2713aSLionel Sambuc       !if(Is2Addr,
324f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
325f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
326f4a2713aSLionel Sambuc       pat_rm, NoItinerary, d>,
327f4a2713aSLionel Sambuc       Sched<[WriteVecLogicLd, ReadAfterLd]>;
328f4a2713aSLionel Sambuc}
329f4a2713aSLionel Sambuc
330f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
331f4a2713aSLionel Sambuc//  Non-instruction patterns
332f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
333f4a2713aSLionel Sambuc
334f4a2713aSLionel Sambuc// A vector extract of the first f32/f64 position is a subregister copy
335f4a2713aSLionel Sambucdef : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
336f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
337f4a2713aSLionel Sambucdef : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
338f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
339f4a2713aSLionel Sambuc
340f4a2713aSLionel Sambuc// A 128-bit subvector extract from the first 256-bit vector position
341f4a2713aSLionel Sambuc// is a subregister copy that needs no instruction.
342f4a2713aSLionel Sambucdef : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))),
343f4a2713aSLionel Sambuc          (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
344f4a2713aSLionel Sambucdef : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))),
345f4a2713aSLionel Sambuc          (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
346f4a2713aSLionel Sambuc
347f4a2713aSLionel Sambucdef : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))),
348f4a2713aSLionel Sambuc          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
349f4a2713aSLionel Sambucdef : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))),
350f4a2713aSLionel Sambuc          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
351f4a2713aSLionel Sambuc
352f4a2713aSLionel Sambucdef : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))),
353f4a2713aSLionel Sambuc          (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
354f4a2713aSLionel Sambucdef : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))),
355f4a2713aSLionel Sambuc          (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
356f4a2713aSLionel Sambuc
357f4a2713aSLionel Sambuc// A 128-bit subvector insert to the first 256-bit vector position
358f4a2713aSLionel Sambuc// is a subregister copy that needs no instruction.
359f4a2713aSLionel Sambuclet AddedComplexity = 25 in { // to give priority over vinsertf128rm
360f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)),
361f4a2713aSLionel Sambuc          (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
362f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)),
363f4a2713aSLionel Sambuc          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
364f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)),
365f4a2713aSLionel Sambuc          (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
366f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)),
367f4a2713aSLionel Sambuc          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
368f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)),
369f4a2713aSLionel Sambuc          (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
370f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)),
371f4a2713aSLionel Sambuc          (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
372f4a2713aSLionel Sambuc}
373f4a2713aSLionel Sambuc
374f4a2713aSLionel Sambuc// Implicitly promote a 32-bit scalar to a vector.
375f4a2713aSLionel Sambucdef : Pat<(v4f32 (scalar_to_vector FR32:$src)),
376f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS FR32:$src, VR128)>;
377f4a2713aSLionel Sambucdef : Pat<(v8f32 (scalar_to_vector FR32:$src)),
378f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS FR32:$src, VR128)>;
379f4a2713aSLionel Sambuc// Implicitly promote a 64-bit scalar to a vector.
380f4a2713aSLionel Sambucdef : Pat<(v2f64 (scalar_to_vector FR64:$src)),
381f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS FR64:$src, VR128)>;
382f4a2713aSLionel Sambucdef : Pat<(v4f64 (scalar_to_vector FR64:$src)),
383f4a2713aSLionel Sambuc          (COPY_TO_REGCLASS FR64:$src, VR128)>;
384f4a2713aSLionel Sambuc
385f4a2713aSLionel Sambuc// Bitcasts between 128-bit vector types. Return the original type since
386f4a2713aSLionel Sambuc// no instruction is needed for the conversion
387f4a2713aSLionel Sambuclet Predicates = [HasSSE2] in {
388f4a2713aSLionel Sambuc  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
389f4a2713aSLionel Sambuc  def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
390f4a2713aSLionel Sambuc  def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
391f4a2713aSLionel Sambuc  def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
392f4a2713aSLionel Sambuc  def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
393f4a2713aSLionel Sambuc  def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
394f4a2713aSLionel Sambuc  def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
395f4a2713aSLionel Sambuc  def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
396f4a2713aSLionel Sambuc  def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
397f4a2713aSLionel Sambuc  def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
398f4a2713aSLionel Sambuc  def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
399f4a2713aSLionel Sambuc  def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
400f4a2713aSLionel Sambuc  def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
401f4a2713aSLionel Sambuc  def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
402f4a2713aSLionel Sambuc  def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
403f4a2713aSLionel Sambuc  def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
404f4a2713aSLionel Sambuc  def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
405f4a2713aSLionel Sambuc  def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
406f4a2713aSLionel Sambuc  def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
407f4a2713aSLionel Sambuc  def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
408f4a2713aSLionel Sambuc  def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
409f4a2713aSLionel Sambuc  def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
410f4a2713aSLionel Sambuc  def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
411f4a2713aSLionel Sambuc  def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
412f4a2713aSLionel Sambuc  def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
413f4a2713aSLionel Sambuc  def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
414f4a2713aSLionel Sambuc  def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
415f4a2713aSLionel Sambuc  def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
416f4a2713aSLionel Sambuc  def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
417f4a2713aSLionel Sambuc  def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
418f4a2713aSLionel Sambuc}
419f4a2713aSLionel Sambuc
420f4a2713aSLionel Sambuc// Bitcasts between 256-bit vector types. Return the original type since
421f4a2713aSLionel Sambuc// no instruction is needed for the conversion
422f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
423f4a2713aSLionel Sambuc  def : Pat<(v4f64  (bitconvert (v8f32 VR256:$src))),  (v4f64 VR256:$src)>;
424f4a2713aSLionel Sambuc  def : Pat<(v4f64  (bitconvert (v8i32 VR256:$src))),  (v4f64 VR256:$src)>;
425f4a2713aSLionel Sambuc  def : Pat<(v4f64  (bitconvert (v4i64 VR256:$src))),  (v4f64 VR256:$src)>;
426f4a2713aSLionel Sambuc  def : Pat<(v4f64  (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
427f4a2713aSLionel Sambuc  def : Pat<(v4f64  (bitconvert (v32i8 VR256:$src))),  (v4f64 VR256:$src)>;
428f4a2713aSLionel Sambuc  def : Pat<(v8f32  (bitconvert (v8i32 VR256:$src))),  (v8f32 VR256:$src)>;
429f4a2713aSLionel Sambuc  def : Pat<(v8f32  (bitconvert (v4i64 VR256:$src))),  (v8f32 VR256:$src)>;
430f4a2713aSLionel Sambuc  def : Pat<(v8f32  (bitconvert (v4f64 VR256:$src))),  (v8f32 VR256:$src)>;
431f4a2713aSLionel Sambuc  def : Pat<(v8f32  (bitconvert (v32i8 VR256:$src))),  (v8f32 VR256:$src)>;
432f4a2713aSLionel Sambuc  def : Pat<(v8f32  (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
433f4a2713aSLionel Sambuc  def : Pat<(v4i64  (bitconvert (v8f32 VR256:$src))),  (v4i64 VR256:$src)>;
434f4a2713aSLionel Sambuc  def : Pat<(v4i64  (bitconvert (v8i32 VR256:$src))),  (v4i64 VR256:$src)>;
435f4a2713aSLionel Sambuc  def : Pat<(v4i64  (bitconvert (v4f64 VR256:$src))),  (v4i64 VR256:$src)>;
436f4a2713aSLionel Sambuc  def : Pat<(v4i64  (bitconvert (v32i8 VR256:$src))),  (v4i64 VR256:$src)>;
437f4a2713aSLionel Sambuc  def : Pat<(v4i64  (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
438f4a2713aSLionel Sambuc  def : Pat<(v32i8  (bitconvert (v4f64 VR256:$src))),  (v32i8 VR256:$src)>;
439f4a2713aSLionel Sambuc  def : Pat<(v32i8  (bitconvert (v4i64 VR256:$src))),  (v32i8 VR256:$src)>;
440f4a2713aSLionel Sambuc  def : Pat<(v32i8  (bitconvert (v8f32 VR256:$src))),  (v32i8 VR256:$src)>;
441f4a2713aSLionel Sambuc  def : Pat<(v32i8  (bitconvert (v8i32 VR256:$src))),  (v32i8 VR256:$src)>;
442f4a2713aSLionel Sambuc  def : Pat<(v32i8  (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
443f4a2713aSLionel Sambuc  def : Pat<(v8i32  (bitconvert (v32i8 VR256:$src))),  (v8i32 VR256:$src)>;
444f4a2713aSLionel Sambuc  def : Pat<(v8i32  (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
445f4a2713aSLionel Sambuc  def : Pat<(v8i32  (bitconvert (v8f32 VR256:$src))),  (v8i32 VR256:$src)>;
446f4a2713aSLionel Sambuc  def : Pat<(v8i32  (bitconvert (v4i64 VR256:$src))),  (v8i32 VR256:$src)>;
447f4a2713aSLionel Sambuc  def : Pat<(v8i32  (bitconvert (v4f64 VR256:$src))),  (v8i32 VR256:$src)>;
448f4a2713aSLionel Sambuc  def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))),  (v16i16 VR256:$src)>;
449f4a2713aSLionel Sambuc  def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))),  (v16i16 VR256:$src)>;
450f4a2713aSLionel Sambuc  def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))),  (v16i16 VR256:$src)>;
451f4a2713aSLionel Sambuc  def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))),  (v16i16 VR256:$src)>;
452f4a2713aSLionel Sambuc  def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))),  (v16i16 VR256:$src)>;
453f4a2713aSLionel Sambuc}
454f4a2713aSLionel Sambuc
455f4a2713aSLionel Sambuc// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
456f4a2713aSLionel Sambuc// This is expanded by ExpandPostRAPseudos.
457f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
458f4a2713aSLionel Sambuc    isPseudo = 1, SchedRW = [WriteZero] in {
459f4a2713aSLionel Sambuc  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
460f4a2713aSLionel Sambuc                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
461f4a2713aSLionel Sambuc  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
462f4a2713aSLionel Sambuc                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
463f4a2713aSLionel Sambuc}
464f4a2713aSLionel Sambuc
465f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
466f4a2713aSLionel Sambuc// AVX & SSE - Zero/One Vectors
467f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
468f4a2713aSLionel Sambuc
469f4a2713aSLionel Sambuc// Alias instruction that maps zero vector to pxor / xorp* for sse.
470f4a2713aSLionel Sambuc// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
471f4a2713aSLionel Sambuc// swizzled by ExecutionDepsFix to pxor.
472f4a2713aSLionel Sambuc// We set canFoldAsLoad because this can be converted to a constant-pool
473f4a2713aSLionel Sambuc// load of an all-zeros value if folding it would be beneficial.
474f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
475f4a2713aSLionel Sambuc    isPseudo = 1, SchedRW = [WriteZero] in {
476f4a2713aSLionel Sambucdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
477f4a2713aSLionel Sambuc               [(set VR128:$dst, (v4f32 immAllZerosV))]>;
478f4a2713aSLionel Sambuc}
479f4a2713aSLionel Sambuc
480f4a2713aSLionel Sambucdef : Pat<(v2f64 immAllZerosV), (V_SET0)>;
481f4a2713aSLionel Sambucdef : Pat<(v4i32 immAllZerosV), (V_SET0)>;
482f4a2713aSLionel Sambucdef : Pat<(v2i64 immAllZerosV), (V_SET0)>;
483f4a2713aSLionel Sambucdef : Pat<(v8i16 immAllZerosV), (V_SET0)>;
484f4a2713aSLionel Sambucdef : Pat<(v16i8 immAllZerosV), (V_SET0)>;
485f4a2713aSLionel Sambuc
486f4a2713aSLionel Sambuc
487f4a2713aSLionel Sambuc// The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
488f4a2713aSLionel Sambuc// and doesn't need it because on sandy bridge the register is set to zero
489f4a2713aSLionel Sambuc// at the rename stage without using any execution unit, so SET0PSY
490f4a2713aSLionel Sambuc// and SET0PDY can be used for vector int instructions without penalty
491f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
492f4a2713aSLionel Sambuc    isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
493f4a2713aSLionel Sambucdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
494f4a2713aSLionel Sambuc                 [(set VR256:$dst, (v8f32 immAllZerosV))]>;
495f4a2713aSLionel Sambuc}
496f4a2713aSLionel Sambuc
497f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
498f4a2713aSLionel Sambuc  def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
499f4a2713aSLionel Sambuc
500f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
501f4a2713aSLionel Sambuc  def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
502f4a2713aSLionel Sambuc  def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
503f4a2713aSLionel Sambuc  def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
504f4a2713aSLionel Sambuc  def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
505f4a2713aSLionel Sambuc}
506f4a2713aSLionel Sambuc
507f4a2713aSLionel Sambuc// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
508f4a2713aSLionel Sambuc// VPXOR instruction writes zero to its upper part, it's safe build zeros.
509f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
510f4a2713aSLionel Sambucdef : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
511f4a2713aSLionel Sambucdef : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
512f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
513f4a2713aSLionel Sambuc
514f4a2713aSLionel Sambucdef : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
515f4a2713aSLionel Sambucdef : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
516f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
517f4a2713aSLionel Sambuc
518f4a2713aSLionel Sambucdef : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
519f4a2713aSLionel Sambucdef : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
520f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
521f4a2713aSLionel Sambuc
522f4a2713aSLionel Sambucdef : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
523f4a2713aSLionel Sambucdef : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
524f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
525f4a2713aSLionel Sambuc}
526f4a2713aSLionel Sambuc
527f4a2713aSLionel Sambuc// We set canFoldAsLoad because this can be converted to a constant-pool
528f4a2713aSLionel Sambuc// load of an all-ones value if folding it would be beneficial.
529f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
530f4a2713aSLionel Sambuc    isPseudo = 1, SchedRW = [WriteZero] in {
531f4a2713aSLionel Sambuc  def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
532f4a2713aSLionel Sambuc                       [(set VR128:$dst, (v4i32 immAllOnesV))]>;
533f4a2713aSLionel Sambuc  let Predicates = [HasAVX2] in
534f4a2713aSLionel Sambuc  def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
535f4a2713aSLionel Sambuc                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
536f4a2713aSLionel Sambuc}
537f4a2713aSLionel Sambuc
538f4a2713aSLionel Sambuc
539f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
540f4a2713aSLionel Sambuc// SSE 1 & 2 - Move FP Scalar Instructions
541f4a2713aSLionel Sambuc//
542f4a2713aSLionel Sambuc// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
543f4a2713aSLionel Sambuc// register copies because it's a partial register update; Register-to-register
544f4a2713aSLionel Sambuc// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
545f4a2713aSLionel Sambuc// that the insert be implementable in terms of a copy, and just mentioned, we
546f4a2713aSLionel Sambuc// don't use movss/movsd for copies.
547f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
548f4a2713aSLionel Sambuc
549f4a2713aSLionel Sambucmulticlass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
550f4a2713aSLionel Sambuc                         X86MemOperand x86memop, string base_opc,
551f4a2713aSLionel Sambuc                         string asm_opr> {
552f4a2713aSLionel Sambuc  def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
553f4a2713aSLionel Sambuc              (ins VR128:$src1, RC:$src2),
554f4a2713aSLionel Sambuc              !strconcat(base_opc, asm_opr),
555f4a2713aSLionel Sambuc              [(set VR128:$dst, (vt (OpNode VR128:$src1,
556f4a2713aSLionel Sambuc                                 (scalar_to_vector RC:$src2))))],
557*0a6a1f1dSLionel Sambuc              IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
558f4a2713aSLionel Sambuc
559f4a2713aSLionel Sambuc  // For the disassembler
560*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
561f4a2713aSLionel Sambuc  def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
562f4a2713aSLionel Sambuc                  (ins VR128:$src1, RC:$src2),
563f4a2713aSLionel Sambuc                  !strconcat(base_opc, asm_opr),
564*0a6a1f1dSLionel Sambuc                  [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
565f4a2713aSLionel Sambuc}
566f4a2713aSLionel Sambuc
567f4a2713aSLionel Sambucmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
568f4a2713aSLionel Sambuc                      X86MemOperand x86memop, string OpcodeStr> {
569f4a2713aSLionel Sambuc  // AVX
570f4a2713aSLionel Sambuc  defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
571f4a2713aSLionel Sambuc                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
572f4a2713aSLionel Sambuc                              VEX_4V, VEX_LIG;
573f4a2713aSLionel Sambuc
574f4a2713aSLionel Sambuc  def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
575f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
576f4a2713aSLionel Sambuc                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
577f4a2713aSLionel Sambuc                     VEX, VEX_LIG, Sched<[WriteStore]>;
578f4a2713aSLionel Sambuc  // SSE1 & 2
579f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
580f4a2713aSLionel Sambuc    defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
581f4a2713aSLionel Sambuc                              "\t{$src2, $dst|$dst, $src2}">;
582f4a2713aSLionel Sambuc  }
583f4a2713aSLionel Sambuc
584f4a2713aSLionel Sambuc  def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
585f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
586f4a2713aSLionel Sambuc                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
587f4a2713aSLionel Sambuc                  Sched<[WriteStore]>;
588f4a2713aSLionel Sambuc}
589f4a2713aSLionel Sambuc
590f4a2713aSLionel Sambuc// Loading from memory automatically zeroing upper bits.
591f4a2713aSLionel Sambucmulticlass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
592f4a2713aSLionel Sambuc                         PatFrag mem_pat, string OpcodeStr> {
593f4a2713aSLionel Sambuc  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
594f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
595f4a2713aSLionel Sambuc                     [(set RC:$dst, (mem_pat addr:$src))],
596f4a2713aSLionel Sambuc                     IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
597f4a2713aSLionel Sambuc  def NAME#rm   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
598f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
599f4a2713aSLionel Sambuc                     [(set RC:$dst, (mem_pat addr:$src))],
600f4a2713aSLionel Sambuc                     IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
601f4a2713aSLionel Sambuc}
602f4a2713aSLionel Sambuc
603f4a2713aSLionel Sambucdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
604f4a2713aSLionel Sambucdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
605f4a2713aSLionel Sambuc
606f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = 1 in {
607f4a2713aSLionel Sambuc  defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
608f4a2713aSLionel Sambuc
609f4a2713aSLionel Sambuc  let AddedComplexity = 20 in
610f4a2713aSLionel Sambuc    defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
611f4a2713aSLionel Sambuc}
612f4a2713aSLionel Sambuc
613f4a2713aSLionel Sambuc// Patterns
614f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
615f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
616f4a2713aSLionel Sambuc  // MOVSSrm zeros the high parts of the register; represent this
617f4a2713aSLionel Sambuc  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
618f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
619f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
620f4a2713aSLionel Sambuc  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
621f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
622f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
623f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
624f4a2713aSLionel Sambuc
625f4a2713aSLionel Sambuc  // MOVSDrm zeros the high parts of the register; represent this
626f4a2713aSLionel Sambuc  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
627f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
628f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
629f4a2713aSLionel Sambuc  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
630f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
631f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
632f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
633f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
634f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
635f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzload addr:$src)),
636f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
637f4a2713aSLionel Sambuc
638f4a2713aSLionel Sambuc  // Represent the same patterns above but in the form they appear for
639f4a2713aSLionel Sambuc  // 256-bit types
640f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
641f4a2713aSLionel Sambuc                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
642f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
643f4a2713aSLionel Sambuc  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
644f4a2713aSLionel Sambuc                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
645f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
646f4a2713aSLionel Sambuc  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
647f4a2713aSLionel Sambuc                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
648f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
649f4a2713aSLionel Sambuc  }
650f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
651f4a2713aSLionel Sambuc                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
652f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
653f4a2713aSLionel Sambuc
654f4a2713aSLionel Sambuc  // Extract and store.
655f4a2713aSLionel Sambuc  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
656f4a2713aSLionel Sambuc                   addr:$dst),
657f4a2713aSLionel Sambuc            (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
658f4a2713aSLionel Sambuc  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
659f4a2713aSLionel Sambuc                   addr:$dst),
660f4a2713aSLionel Sambuc            (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
661f4a2713aSLionel Sambuc
662f4a2713aSLionel Sambuc  // Shuffle with VMOVSS
663f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
664f4a2713aSLionel Sambuc            (VMOVSSrr (v4i32 VR128:$src1),
665f4a2713aSLionel Sambuc                      (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
666f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
667f4a2713aSLionel Sambuc            (VMOVSSrr (v4f32 VR128:$src1),
668f4a2713aSLionel Sambuc                      (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
669f4a2713aSLionel Sambuc
670f4a2713aSLionel Sambuc  // 256-bit variants
671f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
672f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0),
673f4a2713aSLionel Sambuc              (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
674f4a2713aSLionel Sambuc                        (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
675f4a2713aSLionel Sambuc              sub_xmm)>;
676f4a2713aSLionel Sambuc  def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
677f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0),
678f4a2713aSLionel Sambuc              (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
679f4a2713aSLionel Sambuc                        (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
680f4a2713aSLionel Sambuc              sub_xmm)>;
681f4a2713aSLionel Sambuc
682f4a2713aSLionel Sambuc  // Shuffle with VMOVSD
683f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
684f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
685f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
686f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
687f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
688f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
689f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
690f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
691f4a2713aSLionel Sambuc
692f4a2713aSLionel Sambuc  // 256-bit variants
693f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
694f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0),
695f4a2713aSLionel Sambuc              (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
696f4a2713aSLionel Sambuc                        (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
697f4a2713aSLionel Sambuc              sub_xmm)>;
698f4a2713aSLionel Sambuc  def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
699f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0),
700f4a2713aSLionel Sambuc              (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
701f4a2713aSLionel Sambuc                        (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
702f4a2713aSLionel Sambuc              sub_xmm)>;
703f4a2713aSLionel Sambuc
704f4a2713aSLionel Sambuc  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
705f4a2713aSLionel Sambuc  // is during lowering, where it's not possible to recognize the fold cause
706f4a2713aSLionel Sambuc  // it has two uses through a bitcast. One use disappears at isel time and the
707f4a2713aSLionel Sambuc  // fold opportunity reappears.
708f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
709f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
710f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
711f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
712f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
713f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
714f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
715f4a2713aSLionel Sambuc            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
716f4a2713aSLionel Sambuc}
717f4a2713aSLionel Sambuc
718f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
719*0a6a1f1dSLionel Sambuc  let Predicates = [NoSSE41], AddedComplexity = 15 in {
720f4a2713aSLionel Sambuc  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
721f4a2713aSLionel Sambuc  // MOVSS to the lower bits.
722f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
723f4a2713aSLionel Sambuc            (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
724f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
725f4a2713aSLionel Sambuc            (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
726f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
727f4a2713aSLionel Sambuc            (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
728f4a2713aSLionel Sambuc  }
729f4a2713aSLionel Sambuc
730f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
731f4a2713aSLionel Sambuc  // MOVSSrm already zeros the high parts of the register.
732f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
733f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
734f4a2713aSLionel Sambuc  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
735f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
736f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
737f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
738f4a2713aSLionel Sambuc  }
739f4a2713aSLionel Sambuc
740f4a2713aSLionel Sambuc  // Extract and store.
741f4a2713aSLionel Sambuc  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
742f4a2713aSLionel Sambuc                   addr:$dst),
743f4a2713aSLionel Sambuc            (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
744f4a2713aSLionel Sambuc
745f4a2713aSLionel Sambuc  // Shuffle with MOVSS
746f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
747f4a2713aSLionel Sambuc            (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
748f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
749f4a2713aSLionel Sambuc            (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
750f4a2713aSLionel Sambuc}
751f4a2713aSLionel Sambuc
752f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
753*0a6a1f1dSLionel Sambuc  let Predicates = [NoSSE41], AddedComplexity = 15 in {
754f4a2713aSLionel Sambuc  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
755f4a2713aSLionel Sambuc  // MOVSD to the lower bits.
756f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
757f4a2713aSLionel Sambuc            (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
758f4a2713aSLionel Sambuc  }
759f4a2713aSLionel Sambuc
760f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
761f4a2713aSLionel Sambuc  // MOVSDrm already zeros the high parts of the register.
762f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
763f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
764f4a2713aSLionel Sambuc  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
765f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
766f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
767f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
768f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
769f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
770f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vzload addr:$src)),
771f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
772f4a2713aSLionel Sambuc  }
773f4a2713aSLionel Sambuc
774f4a2713aSLionel Sambuc  // Extract and store.
775f4a2713aSLionel Sambuc  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
776f4a2713aSLionel Sambuc                   addr:$dst),
777f4a2713aSLionel Sambuc            (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
778f4a2713aSLionel Sambuc
779f4a2713aSLionel Sambuc  // Shuffle with MOVSD
780f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
781f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
782f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
783f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
784f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
785f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
786f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
787f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
788f4a2713aSLionel Sambuc
789f4a2713aSLionel Sambuc  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
790f4a2713aSLionel Sambuc  // is during lowering, where it's not possible to recognize the fold cause
791f4a2713aSLionel Sambuc  // it has two uses through a bitcast. One use disappears at isel time and the
792f4a2713aSLionel Sambuc  // fold opportunity reappears.
793f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
794f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
795f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
796f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
797f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
798f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
799f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
800f4a2713aSLionel Sambuc            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
801f4a2713aSLionel Sambuc}
802f4a2713aSLionel Sambuc
803f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
804f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
805f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
806f4a2713aSLionel Sambuc
807f4a2713aSLionel Sambucmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC,
808f4a2713aSLionel Sambuc                            X86MemOperand x86memop, PatFrag ld_frag,
809f4a2713aSLionel Sambuc                            string asm, Domain d,
810f4a2713aSLionel Sambuc                            OpndItins itins,
811f4a2713aSLionel Sambuc                            bit IsReMaterializable = 1> {
812*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in
813f4a2713aSLionel Sambuc  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
814f4a2713aSLionel Sambuc              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
815*0a6a1f1dSLionel Sambuc           Sched<[WriteFShuffle]>;
816f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
817f4a2713aSLionel Sambuc  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
818f4a2713aSLionel Sambuc              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
819f4a2713aSLionel Sambuc                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
820f4a2713aSLionel Sambuc           Sched<[WriteLoad]>;
821f4a2713aSLionel Sambuc}
822f4a2713aSLionel Sambuc
823*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
824f4a2713aSLionel Sambucdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
825f4a2713aSLionel Sambuc                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
826*0a6a1f1dSLionel Sambuc                              PS, VEX;
827f4a2713aSLionel Sambucdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
828f4a2713aSLionel Sambuc                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
829*0a6a1f1dSLionel Sambuc                              PD, VEX;
830f4a2713aSLionel Sambucdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
831f4a2713aSLionel Sambuc                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
832*0a6a1f1dSLionel Sambuc                              PS, VEX;
833f4a2713aSLionel Sambucdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
834f4a2713aSLionel Sambuc                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
835*0a6a1f1dSLionel Sambuc                              PD, VEX;
836f4a2713aSLionel Sambuc
837f4a2713aSLionel Sambucdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
838f4a2713aSLionel Sambuc                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
839*0a6a1f1dSLionel Sambuc                              PS, VEX, VEX_L;
840f4a2713aSLionel Sambucdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
841f4a2713aSLionel Sambuc                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
842*0a6a1f1dSLionel Sambuc                              PD, VEX, VEX_L;
843f4a2713aSLionel Sambucdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
844f4a2713aSLionel Sambuc                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
845*0a6a1f1dSLionel Sambuc                              PS, VEX, VEX_L;
846f4a2713aSLionel Sambucdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
847f4a2713aSLionel Sambuc                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
848*0a6a1f1dSLionel Sambuc                              PD, VEX, VEX_L;
849*0a6a1f1dSLionel Sambuc}
850*0a6a1f1dSLionel Sambuc
851*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in {
852f4a2713aSLionel Sambucdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
853f4a2713aSLionel Sambuc                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
854*0a6a1f1dSLionel Sambuc                              PS;
855f4a2713aSLionel Sambucdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
856f4a2713aSLionel Sambuc                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
857*0a6a1f1dSLionel Sambuc                              PS;
858*0a6a1f1dSLionel Sambuc}
859*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in {
860*0a6a1f1dSLionel Sambucdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
861*0a6a1f1dSLionel Sambuc                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
862*0a6a1f1dSLionel Sambuc                              PD;
863f4a2713aSLionel Sambucdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
864f4a2713aSLionel Sambuc                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
865*0a6a1f1dSLionel Sambuc                              PD;
866*0a6a1f1dSLionel Sambuc}
867f4a2713aSLionel Sambuc
868*0a6a1f1dSLionel Sambuclet SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX]  in {
869f4a2713aSLionel Sambucdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
870f4a2713aSLionel Sambuc                   "movaps\t{$src, $dst|$dst, $src}",
871f4a2713aSLionel Sambuc                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
872f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>, VEX;
873f4a2713aSLionel Sambucdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
874f4a2713aSLionel Sambuc                   "movapd\t{$src, $dst|$dst, $src}",
875f4a2713aSLionel Sambuc                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
876f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>, VEX;
877f4a2713aSLionel Sambucdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
878f4a2713aSLionel Sambuc                   "movups\t{$src, $dst|$dst, $src}",
879f4a2713aSLionel Sambuc                   [(store (v4f32 VR128:$src), addr:$dst)],
880f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>, VEX;
881f4a2713aSLionel Sambucdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
882f4a2713aSLionel Sambuc                   "movupd\t{$src, $dst|$dst, $src}",
883f4a2713aSLionel Sambuc                   [(store (v2f64 VR128:$src), addr:$dst)],
884f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>, VEX;
885f4a2713aSLionel Sambucdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
886f4a2713aSLionel Sambuc                   "movaps\t{$src, $dst|$dst, $src}",
887f4a2713aSLionel Sambuc                   [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
888f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
889f4a2713aSLionel Sambucdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
890f4a2713aSLionel Sambuc                   "movapd\t{$src, $dst|$dst, $src}",
891f4a2713aSLionel Sambuc                   [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
892f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
893f4a2713aSLionel Sambucdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
894f4a2713aSLionel Sambuc                   "movups\t{$src, $dst|$dst, $src}",
895f4a2713aSLionel Sambuc                   [(store (v8f32 VR256:$src), addr:$dst)],
896f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
897f4a2713aSLionel Sambucdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
898f4a2713aSLionel Sambuc                   "movupd\t{$src, $dst|$dst, $src}",
899f4a2713aSLionel Sambuc                   [(store (v4f64 VR256:$src), addr:$dst)],
900f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
901f4a2713aSLionel Sambuc} // SchedRW
902f4a2713aSLionel Sambuc
903f4a2713aSLionel Sambuc// For disassembler
904*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
905*0a6a1f1dSLionel Sambuc    SchedRW = [WriteFShuffle] in {
906f4a2713aSLionel Sambuc  def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
907f4a2713aSLionel Sambuc                          (ins VR128:$src),
908f4a2713aSLionel Sambuc                          "movaps\t{$src, $dst|$dst, $src}", [],
909f4a2713aSLionel Sambuc                          IIC_SSE_MOVA_P_RR>, VEX;
910f4a2713aSLionel Sambuc  def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
911f4a2713aSLionel Sambuc                           (ins VR128:$src),
912f4a2713aSLionel Sambuc                           "movapd\t{$src, $dst|$dst, $src}", [],
913f4a2713aSLionel Sambuc                           IIC_SSE_MOVA_P_RR>, VEX;
914f4a2713aSLionel Sambuc  def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
915f4a2713aSLionel Sambuc                           (ins VR128:$src),
916f4a2713aSLionel Sambuc                           "movups\t{$src, $dst|$dst, $src}", [],
917f4a2713aSLionel Sambuc                           IIC_SSE_MOVU_P_RR>, VEX;
918f4a2713aSLionel Sambuc  def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
919f4a2713aSLionel Sambuc                           (ins VR128:$src),
920f4a2713aSLionel Sambuc                           "movupd\t{$src, $dst|$dst, $src}", [],
921f4a2713aSLionel Sambuc                           IIC_SSE_MOVU_P_RR>, VEX;
922f4a2713aSLionel Sambuc  def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
923f4a2713aSLionel Sambuc                            (ins VR256:$src),
924f4a2713aSLionel Sambuc                            "movaps\t{$src, $dst|$dst, $src}", [],
925f4a2713aSLionel Sambuc                            IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
926f4a2713aSLionel Sambuc  def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
927f4a2713aSLionel Sambuc                            (ins VR256:$src),
928f4a2713aSLionel Sambuc                            "movapd\t{$src, $dst|$dst, $src}", [],
929f4a2713aSLionel Sambuc                            IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
930f4a2713aSLionel Sambuc  def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
931f4a2713aSLionel Sambuc                            (ins VR256:$src),
932f4a2713aSLionel Sambuc                            "movups\t{$src, $dst|$dst, $src}", [],
933f4a2713aSLionel Sambuc                            IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
934f4a2713aSLionel Sambuc  def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
935f4a2713aSLionel Sambuc                            (ins VR256:$src),
936f4a2713aSLionel Sambuc                            "movupd\t{$src, $dst|$dst, $src}", [],
937f4a2713aSLionel Sambuc                            IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
938f4a2713aSLionel Sambuc}
939f4a2713aSLionel Sambuc
940f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
941f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86vzmovl
942f4a2713aSLionel Sambuc                  (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
943f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
944f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86vzmovl
945f4a2713aSLionel Sambuc                  (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
946f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
947f4a2713aSLionel Sambucdef : Pat<(v8f32 (X86vzmovl
948f4a2713aSLionel Sambuc                  (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
949f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
950f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86vzmovl
951f4a2713aSLionel Sambuc                  (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
952f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
953f4a2713aSLionel Sambuc}
954f4a2713aSLionel Sambuc
955f4a2713aSLionel Sambuc
956f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
957f4a2713aSLionel Sambuc          (VMOVUPSYmr addr:$dst, VR256:$src)>;
958f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
959f4a2713aSLionel Sambuc          (VMOVUPDYmr addr:$dst, VR256:$src)>;
960f4a2713aSLionel Sambuc
961f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in {
962f4a2713aSLionel Sambucdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
963f4a2713aSLionel Sambuc                   "movaps\t{$src, $dst|$dst, $src}",
964f4a2713aSLionel Sambuc                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
965f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>;
966f4a2713aSLionel Sambucdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
967f4a2713aSLionel Sambuc                   "movapd\t{$src, $dst|$dst, $src}",
968f4a2713aSLionel Sambuc                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
969f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>;
970f4a2713aSLionel Sambucdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
971f4a2713aSLionel Sambuc                   "movups\t{$src, $dst|$dst, $src}",
972f4a2713aSLionel Sambuc                   [(store (v4f32 VR128:$src), addr:$dst)],
973f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>;
974f4a2713aSLionel Sambucdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
975f4a2713aSLionel Sambuc                   "movupd\t{$src, $dst|$dst, $src}",
976f4a2713aSLionel Sambuc                   [(store (v2f64 VR128:$src), addr:$dst)],
977f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>;
978f4a2713aSLionel Sambuc} // SchedRW
979f4a2713aSLionel Sambuc
980f4a2713aSLionel Sambuc// For disassembler
981*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
982*0a6a1f1dSLionel Sambuc    SchedRW = [WriteFShuffle] in {
983f4a2713aSLionel Sambuc  def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
984f4a2713aSLionel Sambuc                         "movaps\t{$src, $dst|$dst, $src}", [],
985f4a2713aSLionel Sambuc                         IIC_SSE_MOVA_P_RR>;
986f4a2713aSLionel Sambuc  def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
987f4a2713aSLionel Sambuc                         "movapd\t{$src, $dst|$dst, $src}", [],
988f4a2713aSLionel Sambuc                         IIC_SSE_MOVA_P_RR>;
989f4a2713aSLionel Sambuc  def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
990f4a2713aSLionel Sambuc                         "movups\t{$src, $dst|$dst, $src}", [],
991f4a2713aSLionel Sambuc                         IIC_SSE_MOVU_P_RR>;
992f4a2713aSLionel Sambuc  def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
993f4a2713aSLionel Sambuc                         "movupd\t{$src, $dst|$dst, $src}", [],
994f4a2713aSLionel Sambuc                         IIC_SSE_MOVU_P_RR>;
995f4a2713aSLionel Sambuc}
996f4a2713aSLionel Sambuc
997f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
998f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
999f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, VR128:$src)>;
1000f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
1001f4a2713aSLionel Sambuc            (VMOVUPDmr addr:$dst, VR128:$src)>;
1002f4a2713aSLionel Sambuc}
1003f4a2713aSLionel Sambuc
1004f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in
1005f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
1006f4a2713aSLionel Sambuc            (MOVUPSmr addr:$dst, VR128:$src)>;
1007f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in
1008f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
1009f4a2713aSLionel Sambuc            (MOVUPDmr addr:$dst, VR128:$src)>;
1010f4a2713aSLionel Sambuc
1011f4a2713aSLionel Sambuc// Use vmovaps/vmovups for AVX integer load/store.
1012*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
1013f4a2713aSLionel Sambuc  // 128-bit load/store
1014f4a2713aSLionel Sambuc  def : Pat<(alignedloadv2i64 addr:$src),
1015f4a2713aSLionel Sambuc            (VMOVAPSrm addr:$src)>;
1016f4a2713aSLionel Sambuc  def : Pat<(loadv2i64 addr:$src),
1017f4a2713aSLionel Sambuc            (VMOVUPSrm addr:$src)>;
1018f4a2713aSLionel Sambuc
1019f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
1020f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, VR128:$src)>;
1021f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
1022f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, VR128:$src)>;
1023f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
1024f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, VR128:$src)>;
1025f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
1026f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, VR128:$src)>;
1027f4a2713aSLionel Sambuc  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1028f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, VR128:$src)>;
1029f4a2713aSLionel Sambuc  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1030f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, VR128:$src)>;
1031f4a2713aSLionel Sambuc  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1032f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, VR128:$src)>;
1033f4a2713aSLionel Sambuc  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1034f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, VR128:$src)>;
1035f4a2713aSLionel Sambuc
1036f4a2713aSLionel Sambuc  // 256-bit load/store
1037f4a2713aSLionel Sambuc  def : Pat<(alignedloadv4i64 addr:$src),
1038f4a2713aSLionel Sambuc            (VMOVAPSYrm addr:$src)>;
1039f4a2713aSLionel Sambuc  def : Pat<(loadv4i64 addr:$src),
1040f4a2713aSLionel Sambuc            (VMOVUPSYrm addr:$src)>;
1041f4a2713aSLionel Sambuc  def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
1042f4a2713aSLionel Sambuc            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1043f4a2713aSLionel Sambuc  def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
1044f4a2713aSLionel Sambuc            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1045f4a2713aSLionel Sambuc  def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
1046f4a2713aSLionel Sambuc            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1047f4a2713aSLionel Sambuc  def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
1048f4a2713aSLionel Sambuc            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1049f4a2713aSLionel Sambuc  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
1050f4a2713aSLionel Sambuc            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1051f4a2713aSLionel Sambuc  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
1052f4a2713aSLionel Sambuc            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1053f4a2713aSLionel Sambuc  def : Pat<(store (v16i16 VR256:$src), addr:$dst),
1054f4a2713aSLionel Sambuc            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1055f4a2713aSLionel Sambuc  def : Pat<(store (v32i8 VR256:$src), addr:$dst),
1056f4a2713aSLionel Sambuc            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1057f4a2713aSLionel Sambuc
1058f4a2713aSLionel Sambuc  // Special patterns for storing subvector extracts of lower 128-bits
1059f4a2713aSLionel Sambuc  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
1060f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v2f64 (extract_subvector
1061f4a2713aSLionel Sambuc                                  (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
1062f4a2713aSLionel Sambuc            (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1063f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v4f32 (extract_subvector
1064f4a2713aSLionel Sambuc                                  (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
1065f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1066f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v2i64 (extract_subvector
1067f4a2713aSLionel Sambuc                                  (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
1068f4a2713aSLionel Sambuc            (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1069f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v4i32 (extract_subvector
1070f4a2713aSLionel Sambuc                                  (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
1071f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1072f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v8i16 (extract_subvector
1073f4a2713aSLionel Sambuc                                  (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
1074f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1075f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v16i8 (extract_subvector
1076f4a2713aSLionel Sambuc                                  (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
1077f4a2713aSLionel Sambuc            (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1078f4a2713aSLionel Sambuc
1079f4a2713aSLionel Sambuc  def : Pat<(store (v2f64 (extract_subvector
1080f4a2713aSLionel Sambuc                           (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
1081f4a2713aSLionel Sambuc            (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1082f4a2713aSLionel Sambuc  def : Pat<(store (v4f32 (extract_subvector
1083f4a2713aSLionel Sambuc                           (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
1084f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1085f4a2713aSLionel Sambuc  def : Pat<(store (v2i64 (extract_subvector
1086f4a2713aSLionel Sambuc                           (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
1087f4a2713aSLionel Sambuc            (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1088f4a2713aSLionel Sambuc  def : Pat<(store (v4i32 (extract_subvector
1089f4a2713aSLionel Sambuc                           (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
1090f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1091f4a2713aSLionel Sambuc  def : Pat<(store (v8i16 (extract_subvector
1092f4a2713aSLionel Sambuc                           (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
1093f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1094f4a2713aSLionel Sambuc  def : Pat<(store (v16i8 (extract_subvector
1095f4a2713aSLionel Sambuc                           (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
1096f4a2713aSLionel Sambuc            (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
1097f4a2713aSLionel Sambuc}
1098f4a2713aSLionel Sambuc
1099f4a2713aSLionel Sambuc// Use movaps / movups for SSE integer load / store (one byte shorter).
1100f4a2713aSLionel Sambuc// The instructions selected below are then converted to MOVDQA/MOVDQU
1101f4a2713aSLionel Sambuc// during the SSE domain pass.
1102f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
1103f4a2713aSLionel Sambuc  def : Pat<(alignedloadv2i64 addr:$src),
1104f4a2713aSLionel Sambuc            (MOVAPSrm addr:$src)>;
1105f4a2713aSLionel Sambuc  def : Pat<(loadv2i64 addr:$src),
1106f4a2713aSLionel Sambuc            (MOVUPSrm addr:$src)>;
1107f4a2713aSLionel Sambuc
1108f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
1109f4a2713aSLionel Sambuc            (MOVAPSmr addr:$dst, VR128:$src)>;
1110f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
1111f4a2713aSLionel Sambuc            (MOVAPSmr addr:$dst, VR128:$src)>;
1112f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
1113f4a2713aSLionel Sambuc            (MOVAPSmr addr:$dst, VR128:$src)>;
1114f4a2713aSLionel Sambuc  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
1115f4a2713aSLionel Sambuc            (MOVAPSmr addr:$dst, VR128:$src)>;
1116f4a2713aSLionel Sambuc  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1117f4a2713aSLionel Sambuc            (MOVUPSmr addr:$dst, VR128:$src)>;
1118f4a2713aSLionel Sambuc  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1119f4a2713aSLionel Sambuc            (MOVUPSmr addr:$dst, VR128:$src)>;
1120f4a2713aSLionel Sambuc  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1121f4a2713aSLionel Sambuc            (MOVUPSmr addr:$dst, VR128:$src)>;
1122f4a2713aSLionel Sambuc  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1123f4a2713aSLionel Sambuc            (MOVUPSmr addr:$dst, VR128:$src)>;
1124f4a2713aSLionel Sambuc}
1125f4a2713aSLionel Sambuc
1126f4a2713aSLionel Sambuc// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
1127f4a2713aSLionel Sambuc// bits are disregarded. FIXME: Set encoding to pseudo!
1128f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
1129f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
1130f4a2713aSLionel Sambuc  def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
1131f4a2713aSLionel Sambuc                         "movaps\t{$src, $dst|$dst, $src}",
1132f4a2713aSLionel Sambuc                         [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
1133f4a2713aSLionel Sambuc                         IIC_SSE_MOVA_P_RM>, VEX;
1134f4a2713aSLionel Sambuc  def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
1135f4a2713aSLionel Sambuc                         "movapd\t{$src, $dst|$dst, $src}",
1136f4a2713aSLionel Sambuc                         [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
1137f4a2713aSLionel Sambuc                         IIC_SSE_MOVA_P_RM>, VEX;
1138f4a2713aSLionel Sambuc  def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
1139f4a2713aSLionel Sambuc                       "movaps\t{$src, $dst|$dst, $src}",
1140f4a2713aSLionel Sambuc                       [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
1141f4a2713aSLionel Sambuc                       IIC_SSE_MOVA_P_RM>;
1142f4a2713aSLionel Sambuc  def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
1143f4a2713aSLionel Sambuc                       "movapd\t{$src, $dst|$dst, $src}",
1144f4a2713aSLionel Sambuc                       [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
1145f4a2713aSLionel Sambuc                       IIC_SSE_MOVA_P_RM>;
1146f4a2713aSLionel Sambuc}
1147f4a2713aSLionel Sambuc}
1148f4a2713aSLionel Sambuc
1149f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1150f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Low packed FP Instructions
1151f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1152f4a2713aSLionel Sambuc
1153f4a2713aSLionel Sambucmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
1154f4a2713aSLionel Sambuc                                      string base_opc, string asm_opr,
1155f4a2713aSLionel Sambuc                                      InstrItinClass itin> {
1156f4a2713aSLionel Sambuc  def PSrm : PI<opc, MRMSrcMem,
1157f4a2713aSLionel Sambuc         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
1158f4a2713aSLionel Sambuc         !strconcat(base_opc, "s", asm_opr),
1159f4a2713aSLionel Sambuc     [(set VR128:$dst,
1160f4a2713aSLionel Sambuc       (psnode VR128:$src1,
1161f4a2713aSLionel Sambuc              (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
1162*0a6a1f1dSLionel Sambuc              itin, SSEPackedSingle>, PS,
1163*0a6a1f1dSLionel Sambuc     Sched<[WriteFShuffleLd, ReadAfterLd]>;
1164f4a2713aSLionel Sambuc
1165f4a2713aSLionel Sambuc  def PDrm : PI<opc, MRMSrcMem,
1166f4a2713aSLionel Sambuc         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
1167f4a2713aSLionel Sambuc         !strconcat(base_opc, "d", asm_opr),
1168f4a2713aSLionel Sambuc     [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
1169f4a2713aSLionel Sambuc                              (scalar_to_vector (loadf64 addr:$src2)))))],
1170*0a6a1f1dSLionel Sambuc              itin, SSEPackedDouble>, PD,
1171*0a6a1f1dSLionel Sambuc     Sched<[WriteFShuffleLd, ReadAfterLd]>;
1172f4a2713aSLionel Sambuc
1173f4a2713aSLionel Sambuc}
1174f4a2713aSLionel Sambuc
1175f4a2713aSLionel Sambucmulticlass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
1176f4a2713aSLionel Sambuc                                 string base_opc, InstrItinClass itin> {
1177f4a2713aSLionel Sambuc  defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
1178f4a2713aSLionel Sambuc                                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1179f4a2713aSLionel Sambuc                                    itin>, VEX_4V;
1180f4a2713aSLionel Sambuc
1181f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
1182f4a2713aSLionel Sambuc  defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
1183f4a2713aSLionel Sambuc                                    "\t{$src2, $dst|$dst, $src2}",
1184f4a2713aSLionel Sambuc                                    itin>;
1185f4a2713aSLionel Sambuc}
1186f4a2713aSLionel Sambuc
1187f4a2713aSLionel Sambuclet AddedComplexity = 20 in {
1188f4a2713aSLionel Sambuc  defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
1189f4a2713aSLionel Sambuc                                    IIC_SSE_MOV_LH>;
1190f4a2713aSLionel Sambuc}
1191f4a2713aSLionel Sambuc
1192f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in {
1193f4a2713aSLionel Sambucdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1194f4a2713aSLionel Sambuc                   "movlps\t{$src, $dst|$dst, $src}",
1195f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
1196f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)],
1197f4a2713aSLionel Sambuc                                 IIC_SSE_MOV_LH>, VEX;
1198f4a2713aSLionel Sambucdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1199f4a2713aSLionel Sambuc                   "movlpd\t{$src, $dst|$dst, $src}",
1200f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract (v2f64 VR128:$src),
1201f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)],
1202f4a2713aSLionel Sambuc                                 IIC_SSE_MOV_LH>, VEX;
1203f4a2713aSLionel Sambucdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1204f4a2713aSLionel Sambuc                   "movlps\t{$src, $dst|$dst, $src}",
1205f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
1206f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)],
1207f4a2713aSLionel Sambuc                                 IIC_SSE_MOV_LH>;
1208f4a2713aSLionel Sambucdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1209f4a2713aSLionel Sambuc                   "movlpd\t{$src, $dst|$dst, $src}",
1210f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract (v2f64 VR128:$src),
1211f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)],
1212f4a2713aSLionel Sambuc                                 IIC_SSE_MOV_LH>;
1213f4a2713aSLionel Sambuc} // SchedRW
1214f4a2713aSLionel Sambuc
1215f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
1216f4a2713aSLionel Sambuc  // Shuffle with VMOVLPS
1217f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
1218f4a2713aSLionel Sambuc            (VMOVLPSrm VR128:$src1, addr:$src2)>;
1219f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
1220f4a2713aSLionel Sambuc            (VMOVLPSrm VR128:$src1, addr:$src2)>;
1221f4a2713aSLionel Sambuc
1222f4a2713aSLionel Sambuc  // Shuffle with VMOVLPD
1223f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1224f4a2713aSLionel Sambuc            (VMOVLPDrm VR128:$src1, addr:$src2)>;
1225f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1226f4a2713aSLionel Sambuc            (VMOVLPDrm VR128:$src1, addr:$src2)>;
1227*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd VR128:$src1,
1228*0a6a1f1dSLionel Sambuc                             (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
1229*0a6a1f1dSLionel Sambuc            (VMOVLPDrm VR128:$src1, addr:$src2)>;
1230f4a2713aSLionel Sambuc
1231f4a2713aSLionel Sambuc  // Store patterns
1232f4a2713aSLionel Sambuc  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1233f4a2713aSLionel Sambuc                   addr:$src1),
1234f4a2713aSLionel Sambuc            (VMOVLPSmr addr:$src1, VR128:$src2)>;
1235f4a2713aSLionel Sambuc  def : Pat<(store (v4i32 (X86Movlps
1236f4a2713aSLionel Sambuc                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
1237f4a2713aSLionel Sambuc            (VMOVLPSmr addr:$src1, VR128:$src2)>;
1238f4a2713aSLionel Sambuc  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1239f4a2713aSLionel Sambuc                   addr:$src1),
1240f4a2713aSLionel Sambuc            (VMOVLPDmr addr:$src1, VR128:$src2)>;
1241f4a2713aSLionel Sambuc  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1242f4a2713aSLionel Sambuc                   addr:$src1),
1243f4a2713aSLionel Sambuc            (VMOVLPDmr addr:$src1, VR128:$src2)>;
1244f4a2713aSLionel Sambuc}
1245f4a2713aSLionel Sambuc
1246f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
1247f4a2713aSLionel Sambuc  // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
1248f4a2713aSLionel Sambuc  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
1249f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$src1),
1250f4a2713aSLionel Sambuc            (MOVLPSmr addr:$src1, VR128:$src2)>;
1251f4a2713aSLionel Sambuc
1252f4a2713aSLionel Sambuc  // Shuffle with MOVLPS
1253f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
1254f4a2713aSLionel Sambuc            (MOVLPSrm VR128:$src1, addr:$src2)>;
1255f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
1256f4a2713aSLionel Sambuc            (MOVLPSrm VR128:$src1, addr:$src2)>;
1257f4a2713aSLionel Sambuc  def : Pat<(X86Movlps VR128:$src1,
1258f4a2713aSLionel Sambuc                      (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1259f4a2713aSLionel Sambuc            (MOVLPSrm VR128:$src1, addr:$src2)>;
1260f4a2713aSLionel Sambuc
1261f4a2713aSLionel Sambuc  // Store patterns
1262f4a2713aSLionel Sambuc  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1263f4a2713aSLionel Sambuc                                      addr:$src1),
1264f4a2713aSLionel Sambuc            (MOVLPSmr addr:$src1, VR128:$src2)>;
1265f4a2713aSLionel Sambuc  def : Pat<(store (v4i32 (X86Movlps
1266f4a2713aSLionel Sambuc                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
1267f4a2713aSLionel Sambuc                              addr:$src1),
1268f4a2713aSLionel Sambuc            (MOVLPSmr addr:$src1, VR128:$src2)>;
1269f4a2713aSLionel Sambuc}
1270f4a2713aSLionel Sambuc
1271f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
1272f4a2713aSLionel Sambuc  // Shuffle with MOVLPD
1273f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1274f4a2713aSLionel Sambuc            (MOVLPDrm VR128:$src1, addr:$src2)>;
1275f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1276f4a2713aSLionel Sambuc            (MOVLPDrm VR128:$src1, addr:$src2)>;
1277*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd VR128:$src1,
1278*0a6a1f1dSLionel Sambuc                             (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
1279*0a6a1f1dSLionel Sambuc            (MOVLPDrm VR128:$src1, addr:$src2)>;
1280f4a2713aSLionel Sambuc
1281f4a2713aSLionel Sambuc  // Store patterns
1282f4a2713aSLionel Sambuc  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1283f4a2713aSLionel Sambuc                           addr:$src1),
1284f4a2713aSLionel Sambuc            (MOVLPDmr addr:$src1, VR128:$src2)>;
1285f4a2713aSLionel Sambuc  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1286f4a2713aSLionel Sambuc                           addr:$src1),
1287f4a2713aSLionel Sambuc            (MOVLPDmr addr:$src1, VR128:$src2)>;
1288f4a2713aSLionel Sambuc}
1289f4a2713aSLionel Sambuc
1290f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1291f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Hi packed FP Instructions
1292f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1293f4a2713aSLionel Sambuc
1294f4a2713aSLionel Sambuclet AddedComplexity = 20 in {
1295f4a2713aSLionel Sambuc  defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
1296f4a2713aSLionel Sambuc                                    IIC_SSE_MOV_LH>;
1297f4a2713aSLionel Sambuc}
1298f4a2713aSLionel Sambuc
1299f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in {
1300f4a2713aSLionel Sambuc// v2f64 extract element 1 is always custom lowered to unpack high to low
1301f4a2713aSLionel Sambuc// and extract element 0 so the non-store version isn't too horrible.
1302f4a2713aSLionel Sambucdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1303f4a2713aSLionel Sambuc                   "movhps\t{$src, $dst|$dst, $src}",
1304f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract
1305f4a2713aSLionel Sambuc                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1306f4a2713aSLionel Sambuc                                            (bc_v2f64 (v4f32 VR128:$src))),
1307f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
1308f4a2713aSLionel Sambucdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1309f4a2713aSLionel Sambuc                   "movhpd\t{$src, $dst|$dst, $src}",
1310f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract
1311f4a2713aSLionel Sambuc                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1312f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
1313f4a2713aSLionel Sambucdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1314f4a2713aSLionel Sambuc                   "movhps\t{$src, $dst|$dst, $src}",
1315f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract
1316f4a2713aSLionel Sambuc                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1317f4a2713aSLionel Sambuc                                            (bc_v2f64 (v4f32 VR128:$src))),
1318f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
1319f4a2713aSLionel Sambucdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1320f4a2713aSLionel Sambuc                   "movhpd\t{$src, $dst|$dst, $src}",
1321f4a2713aSLionel Sambuc                   [(store (f64 (vector_extract
1322f4a2713aSLionel Sambuc                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1323f4a2713aSLionel Sambuc                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
1324f4a2713aSLionel Sambuc} // SchedRW
1325f4a2713aSLionel Sambuc
1326f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
1327f4a2713aSLionel Sambuc  // VMOVHPS patterns
1328f4a2713aSLionel Sambuc  def : Pat<(X86Movlhps VR128:$src1,
1329f4a2713aSLionel Sambuc                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1330f4a2713aSLionel Sambuc            (VMOVHPSrm VR128:$src1, addr:$src2)>;
1331f4a2713aSLionel Sambuc  def : Pat<(X86Movlhps VR128:$src1,
1332f4a2713aSLionel Sambuc                 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
1333f4a2713aSLionel Sambuc            (VMOVHPSrm VR128:$src1, addr:$src2)>;
1334f4a2713aSLionel Sambuc
1335*0a6a1f1dSLionel Sambuc  // VMOVHPD patterns
1336*0a6a1f1dSLionel Sambuc
1337f4a2713aSLionel Sambuc  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1338f4a2713aSLionel Sambuc  // is during lowering, where it's not possible to recognize the load fold
1339f4a2713aSLionel Sambuc  // cause it has two uses through a bitcast. One use disappears at isel time
1340f4a2713aSLionel Sambuc  // and the fold opportunity reappears.
1341f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1342f4a2713aSLionel Sambuc                      (scalar_to_vector (loadf64 addr:$src2)))),
1343f4a2713aSLionel Sambuc            (VMOVHPDrm VR128:$src1, addr:$src2)>;
1344*0a6a1f1dSLionel Sambuc  // Also handle an i64 load because that may get selected as a faster way to
1345*0a6a1f1dSLionel Sambuc  // load the data.
1346*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1347*0a6a1f1dSLionel Sambuc                      (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
1348*0a6a1f1dSLionel Sambuc            (VMOVHPDrm VR128:$src1, addr:$src2)>;
1349*0a6a1f1dSLionel Sambuc
1350*0a6a1f1dSLionel Sambuc  def : Pat<(store (f64 (vector_extract
1351*0a6a1f1dSLionel Sambuc                          (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
1352*0a6a1f1dSLionel Sambuc                          (iPTR 0))), addr:$dst),
1353*0a6a1f1dSLionel Sambuc            (VMOVHPDmr addr:$dst, VR128:$src)>;
1354f4a2713aSLionel Sambuc}
1355f4a2713aSLionel Sambuc
1356f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
1357f4a2713aSLionel Sambuc  // MOVHPS patterns
1358f4a2713aSLionel Sambuc  def : Pat<(X86Movlhps VR128:$src1,
1359f4a2713aSLionel Sambuc                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1360f4a2713aSLionel Sambuc            (MOVHPSrm VR128:$src1, addr:$src2)>;
1361f4a2713aSLionel Sambuc  def : Pat<(X86Movlhps VR128:$src1,
1362f4a2713aSLionel Sambuc                 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
1363f4a2713aSLionel Sambuc            (MOVHPSrm VR128:$src1, addr:$src2)>;
1364f4a2713aSLionel Sambuc}
1365f4a2713aSLionel Sambuc
1366f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
1367*0a6a1f1dSLionel Sambuc  // MOVHPD patterns
1368*0a6a1f1dSLionel Sambuc
1369f4a2713aSLionel Sambuc  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1370f4a2713aSLionel Sambuc  // is during lowering, where it's not possible to recognize the load fold
1371f4a2713aSLionel Sambuc  // cause it has two uses through a bitcast. One use disappears at isel time
1372f4a2713aSLionel Sambuc  // and the fold opportunity reappears.
1373f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1374f4a2713aSLionel Sambuc                      (scalar_to_vector (loadf64 addr:$src2)))),
1375f4a2713aSLionel Sambuc            (MOVHPDrm VR128:$src1, addr:$src2)>;
1376*0a6a1f1dSLionel Sambuc  // Also handle an i64 load because that may get selected as a faster way to
1377*0a6a1f1dSLionel Sambuc  // load the data.
1378*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1379*0a6a1f1dSLionel Sambuc                      (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
1380*0a6a1f1dSLionel Sambuc            (MOVHPDrm VR128:$src1, addr:$src2)>;
1381*0a6a1f1dSLionel Sambuc
1382*0a6a1f1dSLionel Sambuc  def : Pat<(store (f64 (vector_extract
1383*0a6a1f1dSLionel Sambuc                          (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
1384*0a6a1f1dSLionel Sambuc                          (iPTR 0))), addr:$dst),
1385*0a6a1f1dSLionel Sambuc            (MOVHPDmr addr:$dst, VR128:$src)>;
1386f4a2713aSLionel Sambuc}
1387f4a2713aSLionel Sambuc
1388f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1389f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
1390f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1391f4a2713aSLionel Sambuc
1392f4a2713aSLionel Sambuclet AddedComplexity = 20, Predicates = [UseAVX] in {
1393f4a2713aSLionel Sambuc  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
1394f4a2713aSLionel Sambuc                                       (ins VR128:$src1, VR128:$src2),
1395f4a2713aSLionel Sambuc                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1396f4a2713aSLionel Sambuc                      [(set VR128:$dst,
1397f4a2713aSLionel Sambuc                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
1398f4a2713aSLionel Sambuc                        IIC_SSE_MOV_LH>,
1399*0a6a1f1dSLionel Sambuc                      VEX_4V, Sched<[WriteFShuffle]>;
1400f4a2713aSLionel Sambuc  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
1401f4a2713aSLionel Sambuc                                       (ins VR128:$src1, VR128:$src2),
1402f4a2713aSLionel Sambuc                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1403f4a2713aSLionel Sambuc                      [(set VR128:$dst,
1404f4a2713aSLionel Sambuc                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
1405f4a2713aSLionel Sambuc                        IIC_SSE_MOV_LH>,
1406*0a6a1f1dSLionel Sambuc                      VEX_4V, Sched<[WriteFShuffle]>;
1407f4a2713aSLionel Sambuc}
1408f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1409f4a2713aSLionel Sambuc  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
1410f4a2713aSLionel Sambuc                                       (ins VR128:$src1, VR128:$src2),
1411f4a2713aSLionel Sambuc                      "movlhps\t{$src2, $dst|$dst, $src2}",
1412f4a2713aSLionel Sambuc                      [(set VR128:$dst,
1413f4a2713aSLionel Sambuc                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
1414*0a6a1f1dSLionel Sambuc                        IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
1415f4a2713aSLionel Sambuc  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
1416f4a2713aSLionel Sambuc                                       (ins VR128:$src1, VR128:$src2),
1417f4a2713aSLionel Sambuc                      "movhlps\t{$src2, $dst|$dst, $src2}",
1418f4a2713aSLionel Sambuc                      [(set VR128:$dst,
1419f4a2713aSLionel Sambuc                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
1420*0a6a1f1dSLionel Sambuc                        IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
1421f4a2713aSLionel Sambuc}
1422f4a2713aSLionel Sambuc
1423f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1424f4a2713aSLionel Sambuc  // MOVLHPS patterns
1425f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1426f4a2713aSLionel Sambuc            (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
1427f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1428f4a2713aSLionel Sambuc            (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1429f4a2713aSLionel Sambuc
1430f4a2713aSLionel Sambuc  // MOVHLPS patterns
1431f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1432f4a2713aSLionel Sambuc            (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
1433f4a2713aSLionel Sambuc}
1434f4a2713aSLionel Sambuc
1435f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
1436f4a2713aSLionel Sambuc  // MOVLHPS patterns
1437f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1438f4a2713aSLionel Sambuc            (MOVLHPSrr VR128:$src1, VR128:$src2)>;
1439f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1440f4a2713aSLionel Sambuc            (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1441f4a2713aSLionel Sambuc
1442f4a2713aSLionel Sambuc  // MOVHLPS patterns
1443f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1444f4a2713aSLionel Sambuc            (MOVHLPSrr VR128:$src1, VR128:$src2)>;
1445f4a2713aSLionel Sambuc}
1446f4a2713aSLionel Sambuc
1447f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1448f4a2713aSLionel Sambuc// SSE 1 & 2 - Conversion Instructions
1449f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
1450f4a2713aSLionel Sambuc
1451f4a2713aSLionel Sambucdef SSE_CVT_PD : OpndItins<
1452f4a2713aSLionel Sambuc  IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
1453f4a2713aSLionel Sambuc>;
1454f4a2713aSLionel Sambuc
1455f4a2713aSLionel Sambuclet Sched = WriteCvtI2F in
1456f4a2713aSLionel Sambucdef SSE_CVT_PS : OpndItins<
1457f4a2713aSLionel Sambuc  IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
1458f4a2713aSLionel Sambuc>;
1459f4a2713aSLionel Sambuc
1460f4a2713aSLionel Sambuclet Sched = WriteCvtI2F in
1461f4a2713aSLionel Sambucdef SSE_CVT_Scalar : OpndItins<
1462f4a2713aSLionel Sambuc  IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
1463f4a2713aSLionel Sambuc>;
1464f4a2713aSLionel Sambuc
1465f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in
1466f4a2713aSLionel Sambucdef SSE_CVT_SS2SI_32 : OpndItins<
1467f4a2713aSLionel Sambuc  IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
1468f4a2713aSLionel Sambuc>;
1469f4a2713aSLionel Sambuc
1470f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in
1471f4a2713aSLionel Sambucdef SSE_CVT_SS2SI_64 : OpndItins<
1472f4a2713aSLionel Sambuc  IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
1473f4a2713aSLionel Sambuc>;
1474f4a2713aSLionel Sambuc
1475f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in
1476f4a2713aSLionel Sambucdef SSE_CVT_SD2SI : OpndItins<
1477f4a2713aSLionel Sambuc  IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
1478f4a2713aSLionel Sambuc>;
1479f4a2713aSLionel Sambuc
1480f4a2713aSLionel Sambucmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1481f4a2713aSLionel Sambuc                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
1482f4a2713aSLionel Sambuc                     string asm, OpndItins itins> {
1483f4a2713aSLionel Sambuc  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1484f4a2713aSLionel Sambuc                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
1485f4a2713aSLionel Sambuc                        itins.rr>, Sched<[itins.Sched]>;
1486f4a2713aSLionel Sambuc  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1487f4a2713aSLionel Sambuc                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
1488f4a2713aSLionel Sambuc                        itins.rm>, Sched<[itins.Sched.Folded]>;
1489f4a2713aSLionel Sambuc}
1490f4a2713aSLionel Sambuc
1491f4a2713aSLionel Sambucmulticlass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1492f4a2713aSLionel Sambuc                       X86MemOperand x86memop, string asm, Domain d,
1493f4a2713aSLionel Sambuc                       OpndItins itins> {
1494*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in {
1495f4a2713aSLionel Sambuc  def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1496f4a2713aSLionel Sambuc             [], itins.rr, d>, Sched<[itins.Sched]>;
1497f4a2713aSLionel Sambuc  let mayLoad = 1 in
1498f4a2713aSLionel Sambuc  def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1499f4a2713aSLionel Sambuc             [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
1500f4a2713aSLionel Sambuc}
1501f4a2713aSLionel Sambuc}
1502f4a2713aSLionel Sambuc
1503f4a2713aSLionel Sambucmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1504f4a2713aSLionel Sambuc                          X86MemOperand x86memop, string asm> {
1505*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in {
1506f4a2713aSLionel Sambuc  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
1507f4a2713aSLionel Sambuc              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
1508f4a2713aSLionel Sambuc           Sched<[WriteCvtI2F]>;
1509f4a2713aSLionel Sambuc  let mayLoad = 1 in
1510f4a2713aSLionel Sambuc  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1511f4a2713aSLionel Sambuc              (ins DstRC:$src1, x86memop:$src),
1512f4a2713aSLionel Sambuc              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
1513f4a2713aSLionel Sambuc           Sched<[WriteCvtI2FLd, ReadAfterLd]>;
1514*0a6a1f1dSLionel Sambuc} // hasSideEffects = 0
1515f4a2713aSLionel Sambuc}
1516f4a2713aSLionel Sambuc
1517f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1518f4a2713aSLionel Sambucdefm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1519f4a2713aSLionel Sambuc                                "cvttss2si\t{$src, $dst|$dst, $src}",
1520f4a2713aSLionel Sambuc                                SSE_CVT_SS2SI_32>,
1521f4a2713aSLionel Sambuc                                XS, VEX, VEX_LIG;
1522f4a2713aSLionel Sambucdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1523f4a2713aSLionel Sambuc                                "cvttss2si\t{$src, $dst|$dst, $src}",
1524f4a2713aSLionel Sambuc                                SSE_CVT_SS2SI_64>,
1525f4a2713aSLionel Sambuc                                XS, VEX, VEX_W, VEX_LIG;
1526f4a2713aSLionel Sambucdefm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1527f4a2713aSLionel Sambuc                                "cvttsd2si\t{$src, $dst|$dst, $src}",
1528f4a2713aSLionel Sambuc                                SSE_CVT_SD2SI>,
1529f4a2713aSLionel Sambuc                                XD, VEX, VEX_LIG;
1530f4a2713aSLionel Sambucdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1531f4a2713aSLionel Sambuc                                "cvttsd2si\t{$src, $dst|$dst, $src}",
1532f4a2713aSLionel Sambuc                                SSE_CVT_SD2SI>,
1533f4a2713aSLionel Sambuc                                XD, VEX, VEX_W, VEX_LIG;
1534f4a2713aSLionel Sambuc
1535f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
1536f4a2713aSLionel Sambuc                (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
1537f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
1538f4a2713aSLionel Sambuc                (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
1539f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
1540f4a2713aSLionel Sambuc                (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
1541f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
1542f4a2713aSLionel Sambuc                (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
1543f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
1544f4a2713aSLionel Sambuc                (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
1545f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
1546f4a2713aSLionel Sambuc                (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
1547f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
1548f4a2713aSLionel Sambuc                (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
1549f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
1550f4a2713aSLionel Sambuc                (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
1551f4a2713aSLionel Sambuc}
1552f4a2713aSLionel Sambuc// The assembler can recognize rr 64-bit instructions by seeing a rxx
1553f4a2713aSLionel Sambuc// register, but the same isn't true when only using memory operands,
1554f4a2713aSLionel Sambuc// provide other assembly "l" and "q" forms to address this explicitly
1555f4a2713aSLionel Sambuc// where appropriate to do so.
1556f4a2713aSLionel Sambucdefm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
1557f4a2713aSLionel Sambuc                                  XS, VEX_4V, VEX_LIG;
1558f4a2713aSLionel Sambucdefm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
1559f4a2713aSLionel Sambuc                                  XS, VEX_4V, VEX_W, VEX_LIG;
1560f4a2713aSLionel Sambucdefm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
1561f4a2713aSLionel Sambuc                                  XD, VEX_4V, VEX_LIG;
1562f4a2713aSLionel Sambucdefm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
1563f4a2713aSLionel Sambuc                                  XD, VEX_4V, VEX_W, VEX_LIG;
1564f4a2713aSLionel Sambuc
1565f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1566f4a2713aSLionel Sambuc  def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
1567*0a6a1f1dSLionel Sambuc                (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
1568f4a2713aSLionel Sambuc  def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
1569*0a6a1f1dSLionel Sambuc                (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
1570f4a2713aSLionel Sambuc
1571f4a2713aSLionel Sambuc  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
1572f4a2713aSLionel Sambuc            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1573f4a2713aSLionel Sambuc  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
1574f4a2713aSLionel Sambuc            (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
1575f4a2713aSLionel Sambuc  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
1576f4a2713aSLionel Sambuc            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
1577f4a2713aSLionel Sambuc  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
1578f4a2713aSLionel Sambuc            (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
1579f4a2713aSLionel Sambuc
1580f4a2713aSLionel Sambuc  def : Pat<(f32 (sint_to_fp GR32:$src)),
1581f4a2713aSLionel Sambuc            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
1582f4a2713aSLionel Sambuc  def : Pat<(f32 (sint_to_fp GR64:$src)),
1583f4a2713aSLionel Sambuc            (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
1584f4a2713aSLionel Sambuc  def : Pat<(f64 (sint_to_fp GR32:$src)),
1585f4a2713aSLionel Sambuc            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
1586f4a2713aSLionel Sambuc  def : Pat<(f64 (sint_to_fp GR64:$src)),
1587f4a2713aSLionel Sambuc            (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
1588f4a2713aSLionel Sambuc}
1589f4a2713aSLionel Sambuc
1590f4a2713aSLionel Sambucdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1591f4a2713aSLionel Sambuc                      "cvttss2si\t{$src, $dst|$dst, $src}",
1592f4a2713aSLionel Sambuc                      SSE_CVT_SS2SI_32>, XS;
1593f4a2713aSLionel Sambucdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1594f4a2713aSLionel Sambuc                      "cvttss2si\t{$src, $dst|$dst, $src}",
1595f4a2713aSLionel Sambuc                      SSE_CVT_SS2SI_64>, XS, REX_W;
1596f4a2713aSLionel Sambucdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1597f4a2713aSLionel Sambuc                      "cvttsd2si\t{$src, $dst|$dst, $src}",
1598f4a2713aSLionel Sambuc                      SSE_CVT_SD2SI>, XD;
1599f4a2713aSLionel Sambucdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1600f4a2713aSLionel Sambuc                      "cvttsd2si\t{$src, $dst|$dst, $src}",
1601f4a2713aSLionel Sambuc                      SSE_CVT_SD2SI>, XD, REX_W;
1602f4a2713aSLionel Sambucdefm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
1603f4a2713aSLionel Sambuc                      "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
1604f4a2713aSLionel Sambuc                      SSE_CVT_Scalar>, XS;
1605f4a2713aSLionel Sambucdefm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
1606f4a2713aSLionel Sambuc                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
1607f4a2713aSLionel Sambuc                      SSE_CVT_Scalar>, XS, REX_W;
1608f4a2713aSLionel Sambucdefm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
1609f4a2713aSLionel Sambuc                      "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
1610f4a2713aSLionel Sambuc                      SSE_CVT_Scalar>, XD;
1611f4a2713aSLionel Sambucdefm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
1612f4a2713aSLionel Sambuc                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
1613f4a2713aSLionel Sambuc                      SSE_CVT_Scalar>, XD, REX_W;
1614f4a2713aSLionel Sambuc
1615f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
1616f4a2713aSLionel Sambuc                (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
1617f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
1618f4a2713aSLionel Sambuc                (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
1619f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
1620f4a2713aSLionel Sambuc                (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
1621f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
1622f4a2713aSLionel Sambuc                (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
1623f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
1624f4a2713aSLionel Sambuc                (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
1625f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
1626f4a2713aSLionel Sambuc                (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
1627f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
1628f4a2713aSLionel Sambuc                (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
1629f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
1630f4a2713aSLionel Sambuc                (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
1631f4a2713aSLionel Sambuc
1632f4a2713aSLionel Sambucdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
1633*0a6a1f1dSLionel Sambuc                (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>;
1634f4a2713aSLionel Sambucdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
1635*0a6a1f1dSLionel Sambuc                (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>;
1636f4a2713aSLionel Sambuc
1637f4a2713aSLionel Sambuc// Conversion Instructions Intrinsics - Match intrinsics which expect MM
1638f4a2713aSLionel Sambuc// and/or XMM operand(s).
1639f4a2713aSLionel Sambuc
1640f4a2713aSLionel Sambucmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1641f4a2713aSLionel Sambuc                         Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
1642f4a2713aSLionel Sambuc                         string asm, OpndItins itins> {
1643f4a2713aSLionel Sambuc  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
1644f4a2713aSLionel Sambuc              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1645f4a2713aSLionel Sambuc              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
1646f4a2713aSLionel Sambuc           Sched<[itins.Sched]>;
1647f4a2713aSLionel Sambuc  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
1648f4a2713aSLionel Sambuc              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1649f4a2713aSLionel Sambuc              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
1650f4a2713aSLionel Sambuc           Sched<[itins.Sched.Folded]>;
1651f4a2713aSLionel Sambuc}
1652f4a2713aSLionel Sambuc
1653f4a2713aSLionel Sambucmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
1654f4a2713aSLionel Sambuc                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
1655f4a2713aSLionel Sambuc                    PatFrag ld_frag, string asm, OpndItins itins,
1656f4a2713aSLionel Sambuc                    bit Is2Addr = 1> {
1657f4a2713aSLionel Sambuc  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
1658f4a2713aSLionel Sambuc              !if(Is2Addr,
1659f4a2713aSLionel Sambuc                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1660f4a2713aSLionel Sambuc                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1661f4a2713aSLionel Sambuc              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
1662f4a2713aSLionel Sambuc              itins.rr>, Sched<[itins.Sched]>;
1663f4a2713aSLionel Sambuc  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1664f4a2713aSLionel Sambuc              (ins DstRC:$src1, x86memop:$src2),
1665f4a2713aSLionel Sambuc              !if(Is2Addr,
1666f4a2713aSLionel Sambuc                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1667f4a2713aSLionel Sambuc                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1668f4a2713aSLionel Sambuc              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
1669f4a2713aSLionel Sambuc              itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1670f4a2713aSLionel Sambuc}
1671f4a2713aSLionel Sambuc
1672f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1673f4a2713aSLionel Sambucdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
1674f4a2713aSLionel Sambuc                  int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
1675f4a2713aSLionel Sambuc                  SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
1676f4a2713aSLionel Sambucdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
1677f4a2713aSLionel Sambuc                    int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
1678f4a2713aSLionel Sambuc                    SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
1679f4a2713aSLionel Sambuc}
1680f4a2713aSLionel Sambucdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
1681f4a2713aSLionel Sambuc                 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
1682f4a2713aSLionel Sambucdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
1683f4a2713aSLionel Sambuc                   sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
1684f4a2713aSLionel Sambuc
1685f4a2713aSLionel Sambuc
1686*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
1687f4a2713aSLionel Sambuc  let Predicates = [UseAVX] in {
1688f4a2713aSLionel Sambuc  defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1689f4a2713aSLionel Sambuc            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
1690f4a2713aSLionel Sambuc            SSE_CVT_Scalar, 0>, XS, VEX_4V;
1691f4a2713aSLionel Sambuc  defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1692f4a2713aSLionel Sambuc            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
1693f4a2713aSLionel Sambuc            SSE_CVT_Scalar, 0>, XS, VEX_4V,
1694f4a2713aSLionel Sambuc            VEX_W;
1695f4a2713aSLionel Sambuc  defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1696f4a2713aSLionel Sambuc            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
1697f4a2713aSLionel Sambuc            SSE_CVT_Scalar, 0>, XD, VEX_4V;
1698f4a2713aSLionel Sambuc  defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1699f4a2713aSLionel Sambuc            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
1700f4a2713aSLionel Sambuc            SSE_CVT_Scalar, 0>, XD,
1701f4a2713aSLionel Sambuc            VEX_4V, VEX_W;
1702f4a2713aSLionel Sambuc  }
1703f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
1704f4a2713aSLionel Sambuc    defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1705f4a2713aSLionel Sambuc                          int_x86_sse_cvtsi2ss, i32mem, loadi32,
1706f4a2713aSLionel Sambuc                          "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
1707f4a2713aSLionel Sambuc    defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1708f4a2713aSLionel Sambuc                          int_x86_sse_cvtsi642ss, i64mem, loadi64,
1709f4a2713aSLionel Sambuc                          "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
1710f4a2713aSLionel Sambuc    defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1711f4a2713aSLionel Sambuc                          int_x86_sse2_cvtsi2sd, i32mem, loadi32,
1712f4a2713aSLionel Sambuc                          "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
1713f4a2713aSLionel Sambuc    defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1714f4a2713aSLionel Sambuc                          int_x86_sse2_cvtsi642sd, i64mem, loadi64,
1715f4a2713aSLionel Sambuc                          "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
1716f4a2713aSLionel Sambuc  }
1717*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1
1718f4a2713aSLionel Sambuc
1719f4a2713aSLionel Sambuc/// SSE 1 Only
1720f4a2713aSLionel Sambuc
1721f4a2713aSLionel Sambuc// Aliases for intrinsics
1722*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
1723f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1724f4a2713aSLionel Sambucdefm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1725f4a2713aSLionel Sambuc                                    ssmem, sse_load_f32, "cvttss2si",
1726f4a2713aSLionel Sambuc                                    SSE_CVT_SS2SI_32>, XS, VEX;
1727f4a2713aSLionel Sambucdefm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1728f4a2713aSLionel Sambuc                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
1729f4a2713aSLionel Sambuc                                   "cvttss2si", SSE_CVT_SS2SI_64>,
1730f4a2713aSLionel Sambuc                                   XS, VEX, VEX_W;
1731f4a2713aSLionel Sambucdefm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1732f4a2713aSLionel Sambuc                                    sdmem, sse_load_f64, "cvttsd2si",
1733f4a2713aSLionel Sambuc                                    SSE_CVT_SD2SI>, XD, VEX;
1734f4a2713aSLionel Sambucdefm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1735f4a2713aSLionel Sambuc                                  int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
1736f4a2713aSLionel Sambuc                                  "cvttsd2si", SSE_CVT_SD2SI>,
1737f4a2713aSLionel Sambuc                                  XD, VEX, VEX_W;
1738f4a2713aSLionel Sambuc}
1739f4a2713aSLionel Sambucdefm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1740f4a2713aSLionel Sambuc                                    ssmem, sse_load_f32, "cvttss2si",
1741f4a2713aSLionel Sambuc                                    SSE_CVT_SS2SI_32>, XS;
1742f4a2713aSLionel Sambucdefm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1743f4a2713aSLionel Sambuc                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
1744f4a2713aSLionel Sambuc                                   "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
1745f4a2713aSLionel Sambucdefm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1746f4a2713aSLionel Sambuc                                    sdmem, sse_load_f64, "cvttsd2si",
1747f4a2713aSLionel Sambuc                                    SSE_CVT_SD2SI>, XD;
1748f4a2713aSLionel Sambucdefm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1749f4a2713aSLionel Sambuc                                  int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
1750f4a2713aSLionel Sambuc                                  "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
1751*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1
1752f4a2713aSLionel Sambuc
1753f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1754f4a2713aSLionel Sambucdefm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
1755f4a2713aSLionel Sambuc                                  ssmem, sse_load_f32, "cvtss2si",
1756f4a2713aSLionel Sambuc                                  SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
1757f4a2713aSLionel Sambucdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
1758f4a2713aSLionel Sambuc                                  ssmem, sse_load_f32, "cvtss2si",
1759f4a2713aSLionel Sambuc                                  SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
1760f4a2713aSLionel Sambuc}
1761f4a2713aSLionel Sambucdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
1762f4a2713aSLionel Sambuc                               ssmem, sse_load_f32, "cvtss2si",
1763f4a2713aSLionel Sambuc                               SSE_CVT_SS2SI_32>, XS;
1764f4a2713aSLionel Sambucdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
1765f4a2713aSLionel Sambuc                                 ssmem, sse_load_f32, "cvtss2si",
1766f4a2713aSLionel Sambuc                                 SSE_CVT_SS2SI_64>, XS, REX_W;
1767f4a2713aSLionel Sambuc
1768f4a2713aSLionel Sambucdefm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
1769f4a2713aSLionel Sambuc                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1770f4a2713aSLionel Sambuc                               SSEPackedSingle, SSE_CVT_PS>,
1771*0a6a1f1dSLionel Sambuc                               PS, VEX, Requires<[HasAVX]>;
1772f4a2713aSLionel Sambucdefm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
1773f4a2713aSLionel Sambuc                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1774f4a2713aSLionel Sambuc                               SSEPackedSingle, SSE_CVT_PS>,
1775*0a6a1f1dSLionel Sambuc                               PS, VEX, VEX_L, Requires<[HasAVX]>;
1776f4a2713aSLionel Sambuc
1777f4a2713aSLionel Sambucdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
1778f4a2713aSLionel Sambuc                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
1779f4a2713aSLionel Sambuc                            SSEPackedSingle, SSE_CVT_PS>,
1780*0a6a1f1dSLionel Sambuc                            PS, Requires<[UseSSE2]>;
1781f4a2713aSLionel Sambuc
1782f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
1783f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
1784f4a2713aSLionel Sambuc                (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
1785f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
1786f4a2713aSLionel Sambuc                (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
1787f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
1788f4a2713aSLionel Sambuc                (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
1789f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
1790f4a2713aSLionel Sambuc                (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
1791f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
1792f4a2713aSLionel Sambuc                (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
1793f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
1794f4a2713aSLionel Sambuc                (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
1795f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
1796f4a2713aSLionel Sambuc                (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
1797f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
1798f4a2713aSLionel Sambuc                (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
1799f4a2713aSLionel Sambuc}
1800f4a2713aSLionel Sambuc
1801f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
1802f4a2713aSLionel Sambuc                (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
1803f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
1804f4a2713aSLionel Sambuc                (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
1805f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
1806f4a2713aSLionel Sambuc                (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
1807f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
1808f4a2713aSLionel Sambuc                (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
1809f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
1810f4a2713aSLionel Sambuc                (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
1811f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
1812f4a2713aSLionel Sambuc                (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
1813f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
1814f4a2713aSLionel Sambuc                (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
1815f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
1816f4a2713aSLionel Sambuc                (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
1817f4a2713aSLionel Sambuc
1818f4a2713aSLionel Sambuc/// SSE 2 Only
1819f4a2713aSLionel Sambuc
1820f4a2713aSLionel Sambuc// Convert scalar double to scalar single
1821*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in {
1822f4a2713aSLionel Sambucdef VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
1823f4a2713aSLionel Sambuc                       (ins FR64:$src1, FR64:$src2),
1824f4a2713aSLionel Sambuc                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1825f4a2713aSLionel Sambuc                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
1826f4a2713aSLionel Sambuc                      Sched<[WriteCvtF2F]>;
1827f4a2713aSLionel Sambuclet mayLoad = 1 in
1828f4a2713aSLionel Sambucdef VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
1829f4a2713aSLionel Sambuc                       (ins FR64:$src1, f64mem:$src2),
1830f4a2713aSLionel Sambuc                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1831f4a2713aSLionel Sambuc                      [], IIC_SSE_CVT_Scalar_RM>,
1832f4a2713aSLionel Sambuc                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
1833f4a2713aSLionel Sambuc                      Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1834f4a2713aSLionel Sambuc}
1835f4a2713aSLionel Sambuc
1836f4a2713aSLionel Sambucdef : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
1837f4a2713aSLionel Sambuc          Requires<[UseAVX]>;
1838f4a2713aSLionel Sambuc
1839f4a2713aSLionel Sambucdef CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
1840f4a2713aSLionel Sambuc                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1841f4a2713aSLionel Sambuc                      [(set FR32:$dst, (fround FR64:$src))],
1842f4a2713aSLionel Sambuc                      IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
1843f4a2713aSLionel Sambucdef CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
1844f4a2713aSLionel Sambuc                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1845f4a2713aSLionel Sambuc                      [(set FR32:$dst, (fround (loadf64 addr:$src)))],
1846f4a2713aSLionel Sambuc                      IIC_SSE_CVT_Scalar_RM>,
1847f4a2713aSLionel Sambuc                      XD,
1848f4a2713aSLionel Sambuc                  Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
1849f4a2713aSLionel Sambuc
1850*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
1851f4a2713aSLionel Sambucdef Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
1852f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1853f4a2713aSLionel Sambuc                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1854f4a2713aSLionel Sambuc                       [(set VR128:$dst,
1855f4a2713aSLionel Sambuc                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
1856f4a2713aSLionel Sambuc                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
1857f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2F]>;
1858f4a2713aSLionel Sambucdef Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
1859f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
1860f4a2713aSLionel Sambuc                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1861f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
1862f4a2713aSLionel Sambuc                                          VR128:$src1, sse_load_f64:$src2))],
1863f4a2713aSLionel Sambuc                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
1864f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1865f4a2713aSLionel Sambuc
1866f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
1867f4a2713aSLionel Sambucdef Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
1868f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1869f4a2713aSLionel Sambuc                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
1870f4a2713aSLionel Sambuc                       [(set VR128:$dst,
1871f4a2713aSLionel Sambuc                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
1872f4a2713aSLionel Sambuc                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
1873f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2F]>;
1874f4a2713aSLionel Sambucdef Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
1875f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
1876f4a2713aSLionel Sambuc                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
1877f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
1878f4a2713aSLionel Sambuc                                          VR128:$src1, sse_load_f64:$src2))],
1879f4a2713aSLionel Sambuc                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
1880f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1881f4a2713aSLionel Sambuc}
1882*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1
1883f4a2713aSLionel Sambuc
1884f4a2713aSLionel Sambuc// Convert scalar single to scalar double
1885f4a2713aSLionel Sambuc// SSE2 instructions with XS prefix
1886*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in {
1887f4a2713aSLionel Sambucdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
1888f4a2713aSLionel Sambuc                    (ins FR32:$src1, FR32:$src2),
1889f4a2713aSLionel Sambuc                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1890f4a2713aSLionel Sambuc                    [], IIC_SSE_CVT_Scalar_RR>,
1891f4a2713aSLionel Sambuc                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
1892f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2F]>;
1893f4a2713aSLionel Sambuclet mayLoad = 1 in
1894f4a2713aSLionel Sambucdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
1895f4a2713aSLionel Sambuc                    (ins FR32:$src1, f32mem:$src2),
1896f4a2713aSLionel Sambuc                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1897f4a2713aSLionel Sambuc                    [], IIC_SSE_CVT_Scalar_RM>,
1898f4a2713aSLionel Sambuc                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
1899f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1900f4a2713aSLionel Sambuc}
1901f4a2713aSLionel Sambuc
1902f4a2713aSLionel Sambucdef : Pat<(f64 (fextend FR32:$src)),
1903f4a2713aSLionel Sambuc    (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
1904f4a2713aSLionel Sambucdef : Pat<(fextend (loadf32 addr:$src)),
1905f4a2713aSLionel Sambuc    (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
1906f4a2713aSLionel Sambuc
1907f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src),
1908f4a2713aSLionel Sambuc    (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
1909f4a2713aSLionel Sambuc    Requires<[UseAVX, OptForSize]>;
1910f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src),
1911f4a2713aSLionel Sambuc    (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
1912f4a2713aSLionel Sambuc    Requires<[UseAVX, OptForSpeed]>;
1913f4a2713aSLionel Sambuc
1914f4a2713aSLionel Sambucdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
1915f4a2713aSLionel Sambuc                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1916f4a2713aSLionel Sambuc                   [(set FR64:$dst, (fextend FR32:$src))],
1917f4a2713aSLionel Sambuc                   IIC_SSE_CVT_Scalar_RR>, XS,
1918f4a2713aSLionel Sambuc                 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
1919f4a2713aSLionel Sambucdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
1920f4a2713aSLionel Sambuc                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1921f4a2713aSLionel Sambuc                   [(set FR64:$dst, (extloadf32 addr:$src))],
1922f4a2713aSLionel Sambuc                   IIC_SSE_CVT_Scalar_RM>, XS,
1923f4a2713aSLionel Sambuc                 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
1924f4a2713aSLionel Sambuc
1925f4a2713aSLionel Sambuc// extload f32 -> f64.  This matches load+fextend because we have a hack in
1926f4a2713aSLionel Sambuc// the isel (PreprocessForFPConvert) that can introduce loads after dag
1927f4a2713aSLionel Sambuc// combine.
1928f4a2713aSLionel Sambuc// Since these loads aren't folded into the fextend, we have to match it
1929f4a2713aSLionel Sambuc// explicitly here.
1930f4a2713aSLionel Sambucdef : Pat<(fextend (loadf32 addr:$src)),
1931f4a2713aSLionel Sambuc          (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
1932f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src),
1933f4a2713aSLionel Sambuc          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
1934f4a2713aSLionel Sambuc
1935*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
1936f4a2713aSLionel Sambucdef Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
1937f4a2713aSLionel Sambuc                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1938f4a2713aSLionel Sambuc                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1939f4a2713aSLionel Sambuc                    [(set VR128:$dst,
1940f4a2713aSLionel Sambuc                      (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
1941f4a2713aSLionel Sambuc                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
1942f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2F]>;
1943f4a2713aSLionel Sambucdef Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
1944f4a2713aSLionel Sambuc                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
1945f4a2713aSLionel Sambuc                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1946f4a2713aSLionel Sambuc                    [(set VR128:$dst,
1947f4a2713aSLionel Sambuc                      (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
1948f4a2713aSLionel Sambuc                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
1949f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1950f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
1951f4a2713aSLionel Sambucdef Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
1952f4a2713aSLionel Sambuc                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1953f4a2713aSLionel Sambuc                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1954f4a2713aSLionel Sambuc                    [(set VR128:$dst,
1955f4a2713aSLionel Sambuc                      (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
1956f4a2713aSLionel Sambuc                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
1957f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2F]>;
1958f4a2713aSLionel Sambucdef Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
1959f4a2713aSLionel Sambuc                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
1960f4a2713aSLionel Sambuc                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1961f4a2713aSLionel Sambuc                    [(set VR128:$dst,
1962f4a2713aSLionel Sambuc                      (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
1963f4a2713aSLionel Sambuc                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
1964f4a2713aSLionel Sambuc                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
1965f4a2713aSLionel Sambuc}
1966*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1
1967f4a2713aSLionel Sambuc
1968f4a2713aSLionel Sambuc// Convert packed single/double fp to doubleword
1969f4a2713aSLionel Sambucdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1970f4a2713aSLionel Sambuc                       "cvtps2dq\t{$src, $dst|$dst, $src}",
1971f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
1972f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
1973f4a2713aSLionel Sambucdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1974f4a2713aSLionel Sambuc                       "cvtps2dq\t{$src, $dst|$dst, $src}",
1975f4a2713aSLionel Sambuc                       [(set VR128:$dst,
1976f4a2713aSLionel Sambuc                         (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
1977f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
1978f4a2713aSLionel Sambucdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1979f4a2713aSLionel Sambuc                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1980f4a2713aSLionel Sambuc                        [(set VR256:$dst,
1981f4a2713aSLionel Sambuc                          (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
1982f4a2713aSLionel Sambuc                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
1983f4a2713aSLionel Sambucdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1984f4a2713aSLionel Sambuc                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1985f4a2713aSLionel Sambuc                        [(set VR256:$dst,
1986f4a2713aSLionel Sambuc                          (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
1987f4a2713aSLionel Sambuc                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
1988f4a2713aSLionel Sambucdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1989f4a2713aSLionel Sambuc                     "cvtps2dq\t{$src, $dst|$dst, $src}",
1990f4a2713aSLionel Sambuc                     [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
1991f4a2713aSLionel Sambuc                     IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
1992f4a2713aSLionel Sambucdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1993f4a2713aSLionel Sambuc                     "cvtps2dq\t{$src, $dst|$dst, $src}",
1994f4a2713aSLionel Sambuc                     [(set VR128:$dst,
1995f4a2713aSLionel Sambuc                       (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
1996f4a2713aSLionel Sambuc                     IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
1997f4a2713aSLionel Sambuc
1998f4a2713aSLionel Sambuc
1999f4a2713aSLionel Sambuc// Convert Packed Double FP to Packed DW Integers
2000f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2001f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm
2002f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead.
2003f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly.
2004f4a2713aSLionel Sambucdef VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2005f4a2713aSLionel Sambuc                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
2006f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
2007f4a2713aSLionel Sambuc                       VEX, Sched<[WriteCvtF2I]>;
2008f4a2713aSLionel Sambuc
2009f4a2713aSLionel Sambuc// XMM only
2010f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
2011*0a6a1f1dSLionel Sambuc                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
2012f4a2713aSLionel Sambucdef VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2013f4a2713aSLionel Sambuc                       "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
2014f4a2713aSLionel Sambuc                       [(set VR128:$dst,
2015f4a2713aSLionel Sambuc                         (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX,
2016f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2ILd]>;
2017f4a2713aSLionel Sambuc
2018f4a2713aSLionel Sambuc// YMM only
2019f4a2713aSLionel Sambucdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2020f4a2713aSLionel Sambuc                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
2021f4a2713aSLionel Sambuc                       [(set VR128:$dst,
2022f4a2713aSLionel Sambuc                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
2023f4a2713aSLionel Sambuc                       Sched<[WriteCvtF2I]>;
2024f4a2713aSLionel Sambucdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
2025f4a2713aSLionel Sambuc                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
2026f4a2713aSLionel Sambuc                       [(set VR128:$dst,
2027f4a2713aSLionel Sambuc                         (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
2028f4a2713aSLionel Sambuc                       VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
2029f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
2030*0a6a1f1dSLionel Sambuc                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
2031f4a2713aSLionel Sambuc}
2032f4a2713aSLionel Sambuc
2033f4a2713aSLionel Sambucdef CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2034f4a2713aSLionel Sambuc                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
2035f4a2713aSLionel Sambuc                      [(set VR128:$dst,
2036f4a2713aSLionel Sambuc                        (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
2037f4a2713aSLionel Sambuc                      IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
2038f4a2713aSLionel Sambucdef CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2039f4a2713aSLionel Sambuc                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
2040f4a2713aSLionel Sambuc                      [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
2041f4a2713aSLionel Sambuc                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
2042f4a2713aSLionel Sambuc
2043f4a2713aSLionel Sambuc// Convert with truncation packed single/double fp to doubleword
2044f4a2713aSLionel Sambuc// SSE2 packed instructions with XS prefix
2045f4a2713aSLionel Sambucdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2046f4a2713aSLionel Sambuc                         "cvttps2dq\t{$src, $dst|$dst, $src}",
2047f4a2713aSLionel Sambuc                         [(set VR128:$dst,
2048f4a2713aSLionel Sambuc                           (int_x86_sse2_cvttps2dq VR128:$src))],
2049f4a2713aSLionel Sambuc                         IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
2050f4a2713aSLionel Sambucdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2051f4a2713aSLionel Sambuc                         "cvttps2dq\t{$src, $dst|$dst, $src}",
2052f4a2713aSLionel Sambuc                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
2053f4a2713aSLionel Sambuc                                            (loadv4f32 addr:$src)))],
2054f4a2713aSLionel Sambuc                         IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
2055f4a2713aSLionel Sambucdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2056f4a2713aSLionel Sambuc                          "cvttps2dq\t{$src, $dst|$dst, $src}",
2057f4a2713aSLionel Sambuc                          [(set VR256:$dst,
2058f4a2713aSLionel Sambuc                            (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
2059f4a2713aSLionel Sambuc                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
2060f4a2713aSLionel Sambucdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2061f4a2713aSLionel Sambuc                          "cvttps2dq\t{$src, $dst|$dst, $src}",
2062f4a2713aSLionel Sambuc                          [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
2063f4a2713aSLionel Sambuc                                             (loadv8f32 addr:$src)))],
2064f4a2713aSLionel Sambuc                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
2065f4a2713aSLionel Sambuc                          Sched<[WriteCvtF2ILd]>;
2066f4a2713aSLionel Sambuc
2067f4a2713aSLionel Sambucdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2068f4a2713aSLionel Sambuc                       "cvttps2dq\t{$src, $dst|$dst, $src}",
2069f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
2070f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
2071f4a2713aSLionel Sambucdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2072f4a2713aSLionel Sambuc                       "cvttps2dq\t{$src, $dst|$dst, $src}",
2073f4a2713aSLionel Sambuc                       [(set VR128:$dst,
2074f4a2713aSLionel Sambuc                         (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
2075f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
2076f4a2713aSLionel Sambuc
2077f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2078f4a2713aSLionel Sambuc  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
2079f4a2713aSLionel Sambuc            (VCVTDQ2PSrr VR128:$src)>;
2080f4a2713aSLionel Sambuc  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
2081f4a2713aSLionel Sambuc            (VCVTDQ2PSrm addr:$src)>;
2082f4a2713aSLionel Sambuc
2083f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
2084f4a2713aSLionel Sambuc            (VCVTDQ2PSrr VR128:$src)>;
2085f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
2086f4a2713aSLionel Sambuc            (VCVTDQ2PSrm addr:$src)>;
2087f4a2713aSLionel Sambuc
2088f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
2089f4a2713aSLionel Sambuc            (VCVTTPS2DQrr VR128:$src)>;
2090f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
2091f4a2713aSLionel Sambuc            (VCVTTPS2DQrm addr:$src)>;
2092f4a2713aSLionel Sambuc
2093f4a2713aSLionel Sambuc  def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
2094f4a2713aSLionel Sambuc            (VCVTDQ2PSYrr VR256:$src)>;
2095f4a2713aSLionel Sambuc  def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
2096f4a2713aSLionel Sambuc            (VCVTDQ2PSYrm addr:$src)>;
2097f4a2713aSLionel Sambuc
2098f4a2713aSLionel Sambuc  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
2099f4a2713aSLionel Sambuc            (VCVTTPS2DQYrr VR256:$src)>;
2100f4a2713aSLionel Sambuc  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
2101f4a2713aSLionel Sambuc            (VCVTTPS2DQYrm addr:$src)>;
2102f4a2713aSLionel Sambuc}
2103f4a2713aSLionel Sambuc
2104f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2105f4a2713aSLionel Sambuc  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
2106f4a2713aSLionel Sambuc            (CVTDQ2PSrr VR128:$src)>;
2107f4a2713aSLionel Sambuc  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
2108f4a2713aSLionel Sambuc            (CVTDQ2PSrm addr:$src)>;
2109f4a2713aSLionel Sambuc
2110f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
2111f4a2713aSLionel Sambuc            (CVTDQ2PSrr VR128:$src)>;
2112f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
2113f4a2713aSLionel Sambuc            (CVTDQ2PSrm addr:$src)>;
2114f4a2713aSLionel Sambuc
2115f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
2116f4a2713aSLionel Sambuc            (CVTTPS2DQrr VR128:$src)>;
2117f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
2118f4a2713aSLionel Sambuc            (CVTTPS2DQrm addr:$src)>;
2119f4a2713aSLionel Sambuc}
2120f4a2713aSLionel Sambuc
2121f4a2713aSLionel Sambucdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2122f4a2713aSLionel Sambuc                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
2123f4a2713aSLionel Sambuc                        [(set VR128:$dst,
2124f4a2713aSLionel Sambuc                              (int_x86_sse2_cvttpd2dq VR128:$src))],
2125f4a2713aSLionel Sambuc                              IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
2126f4a2713aSLionel Sambuc
2127f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm
2128f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead.
2129f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly.
2130f4a2713aSLionel Sambuc
2131f4a2713aSLionel Sambuc// XMM only
2132f4a2713aSLionel Sambucdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
2133*0a6a1f1dSLionel Sambuc                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
2134f4a2713aSLionel Sambucdef VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2135f4a2713aSLionel Sambuc                         "cvttpd2dqx\t{$src, $dst|$dst, $src}",
2136f4a2713aSLionel Sambuc                         [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
2137f4a2713aSLionel Sambuc                                            (loadv2f64 addr:$src)))],
2138f4a2713aSLionel Sambuc                         IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
2139f4a2713aSLionel Sambuc
2140f4a2713aSLionel Sambuc// YMM only
2141f4a2713aSLionel Sambucdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2142f4a2713aSLionel Sambuc                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
2143f4a2713aSLionel Sambuc                         [(set VR128:$dst,
2144f4a2713aSLionel Sambuc                           (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
2145f4a2713aSLionel Sambuc                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
2146f4a2713aSLionel Sambucdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
2147f4a2713aSLionel Sambuc                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
2148f4a2713aSLionel Sambuc                         [(set VR128:$dst,
2149f4a2713aSLionel Sambuc                          (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
2150f4a2713aSLionel Sambuc                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
2151f4a2713aSLionel Sambucdef : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
2152*0a6a1f1dSLionel Sambuc                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
2153f4a2713aSLionel Sambuc
2154f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2155f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
2156f4a2713aSLionel Sambuc            (VCVTTPD2DQYrr VR256:$src)>;
2157f4a2713aSLionel Sambuc  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
2158f4a2713aSLionel Sambuc            (VCVTTPD2DQYrm addr:$src)>;
2159f4a2713aSLionel Sambuc} // Predicates = [HasAVX]
2160f4a2713aSLionel Sambuc
2161f4a2713aSLionel Sambucdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2162f4a2713aSLionel Sambuc                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
2163f4a2713aSLionel Sambuc                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
2164f4a2713aSLionel Sambuc                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
2165f4a2713aSLionel Sambucdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
2166f4a2713aSLionel Sambuc                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
2167f4a2713aSLionel Sambuc                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
2168f4a2713aSLionel Sambuc                                        (memopv2f64 addr:$src)))],
2169f4a2713aSLionel Sambuc                                        IIC_SSE_CVT_PD_RM>,
2170f4a2713aSLionel Sambuc                      Sched<[WriteCvtF2ILd]>;
2171f4a2713aSLionel Sambuc
2172f4a2713aSLionel Sambuc// Convert packed single to packed double
2173f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2174f4a2713aSLionel Sambuc                  // SSE2 instructions without OpSize prefix
2175f4a2713aSLionel Sambucdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2176f4a2713aSLionel Sambuc                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
2177f4a2713aSLionel Sambuc                     [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2178*0a6a1f1dSLionel Sambuc                     IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
2179f4a2713aSLionel Sambucdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2180f4a2713aSLionel Sambuc                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
2181f4a2713aSLionel Sambuc                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
2182*0a6a1f1dSLionel Sambuc                    IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
2183f4a2713aSLionel Sambucdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
2184f4a2713aSLionel Sambuc                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
2185f4a2713aSLionel Sambuc                     [(set VR256:$dst,
2186f4a2713aSLionel Sambuc                       (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
2187*0a6a1f1dSLionel Sambuc                     IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
2188f4a2713aSLionel Sambucdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
2189f4a2713aSLionel Sambuc                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
2190f4a2713aSLionel Sambuc                     [(set VR256:$dst,
2191f4a2713aSLionel Sambuc                       (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
2192*0a6a1f1dSLionel Sambuc                     IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
2193f4a2713aSLionel Sambuc}
2194f4a2713aSLionel Sambuc
2195f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2196f4a2713aSLionel Sambucdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2197f4a2713aSLionel Sambuc                       "cvtps2pd\t{$src, $dst|$dst, $src}",
2198f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2199*0a6a1f1dSLionel Sambuc                       IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
2200f4a2713aSLionel Sambucdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2201f4a2713aSLionel Sambuc                   "cvtps2pd\t{$src, $dst|$dst, $src}",
2202f4a2713aSLionel Sambuc                   [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
2203*0a6a1f1dSLionel Sambuc                   IIC_SSE_CVT_PD_RM>, PS, Sched<[WriteCvtF2FLd]>;
2204f4a2713aSLionel Sambuc}
2205f4a2713aSLionel Sambuc
2206f4a2713aSLionel Sambuc// Convert Packed DW Integers to Packed Double FP
2207f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2208*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayLoad = 1 in
2209f4a2713aSLionel Sambucdef VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
2210f4a2713aSLionel Sambuc                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
2211f4a2713aSLionel Sambuc                     []>, VEX, Sched<[WriteCvtI2FLd]>;
2212f4a2713aSLionel Sambucdef VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2213f4a2713aSLionel Sambuc                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
2214f4a2713aSLionel Sambuc                     [(set VR128:$dst,
2215f4a2713aSLionel Sambuc                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
2216f4a2713aSLionel Sambuc                   Sched<[WriteCvtI2F]>;
2217f4a2713aSLionel Sambucdef VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
2218f4a2713aSLionel Sambuc                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
2219f4a2713aSLionel Sambuc                     [(set VR256:$dst,
2220f4a2713aSLionel Sambuc                       (int_x86_avx_cvtdq2_pd_256
2221f4a2713aSLionel Sambuc                        (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
2222f4a2713aSLionel Sambuc                    Sched<[WriteCvtI2FLd]>;
2223f4a2713aSLionel Sambucdef VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
2224f4a2713aSLionel Sambuc                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
2225f4a2713aSLionel Sambuc                     [(set VR256:$dst,
2226f4a2713aSLionel Sambuc                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
2227f4a2713aSLionel Sambuc                    Sched<[WriteCvtI2F]>;
2228f4a2713aSLionel Sambuc}
2229f4a2713aSLionel Sambuc
2230*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayLoad = 1 in
2231f4a2713aSLionel Sambucdef CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
2232f4a2713aSLionel Sambuc                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
2233f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
2234f4a2713aSLionel Sambucdef CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2235f4a2713aSLionel Sambuc                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
2236f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
2237f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
2238f4a2713aSLionel Sambuc
2239f4a2713aSLionel Sambuc// AVX 256-bit register conversion intrinsics
2240f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2241f4a2713aSLionel Sambuc  def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
2242f4a2713aSLionel Sambuc            (VCVTDQ2PDYrr VR128:$src)>;
2243f4a2713aSLionel Sambuc  def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
2244f4a2713aSLionel Sambuc            (VCVTDQ2PDYrm addr:$src)>;
2245f4a2713aSLionel Sambuc} // Predicates = [HasAVX]
2246f4a2713aSLionel Sambuc
2247f4a2713aSLionel Sambuc// Convert packed double to packed single
2248f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm
2249f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead.
2250f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly.
2251f4a2713aSLionel Sambucdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2252f4a2713aSLionel Sambuc                       "cvtpd2ps\t{$src, $dst|$dst, $src}",
2253f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
2254f4a2713aSLionel Sambuc                       IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
2255f4a2713aSLionel Sambuc
2256f4a2713aSLionel Sambuc// XMM only
2257f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
2258*0a6a1f1dSLionel Sambuc                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
2259f4a2713aSLionel Sambucdef VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2260f4a2713aSLionel Sambuc                        "cvtpd2psx\t{$src, $dst|$dst, $src}",
2261f4a2713aSLionel Sambuc                        [(set VR128:$dst,
2262f4a2713aSLionel Sambuc                          (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))],
2263f4a2713aSLionel Sambuc                        IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
2264f4a2713aSLionel Sambuc
2265f4a2713aSLionel Sambuc// YMM only
2266f4a2713aSLionel Sambucdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2267f4a2713aSLionel Sambuc                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
2268f4a2713aSLionel Sambuc                        [(set VR128:$dst,
2269f4a2713aSLionel Sambuc                          (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
2270f4a2713aSLionel Sambuc                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
2271f4a2713aSLionel Sambucdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
2272f4a2713aSLionel Sambuc                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
2273f4a2713aSLionel Sambuc                        [(set VR128:$dst,
2274f4a2713aSLionel Sambuc                          (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
2275f4a2713aSLionel Sambuc                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
2276f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
2277*0a6a1f1dSLionel Sambuc                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
2278f4a2713aSLionel Sambuc
2279f4a2713aSLionel Sambucdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2280f4a2713aSLionel Sambuc                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
2281f4a2713aSLionel Sambuc                     [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
2282f4a2713aSLionel Sambuc                     IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
2283f4a2713aSLionel Sambucdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2284f4a2713aSLionel Sambuc                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
2285f4a2713aSLionel Sambuc                     [(set VR128:$dst,
2286f4a2713aSLionel Sambuc                       (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
2287f4a2713aSLionel Sambuc                     IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
2288f4a2713aSLionel Sambuc
2289f4a2713aSLionel Sambuc
2290f4a2713aSLionel Sambuc// AVX 256-bit register conversion intrinsics
2291f4a2713aSLionel Sambuc// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
2292f4a2713aSLionel Sambuc// whenever possible to avoid declaring two versions of each one.
2293f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2294f4a2713aSLionel Sambuc  def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
2295f4a2713aSLionel Sambuc            (VCVTDQ2PSYrr VR256:$src)>;
2296f4a2713aSLionel Sambuc  def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
2297f4a2713aSLionel Sambuc            (VCVTDQ2PSYrm addr:$src)>;
2298f4a2713aSLionel Sambuc
2299f4a2713aSLionel Sambuc  // Match fround and fextend for 128/256-bit conversions
2300f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
2301f4a2713aSLionel Sambuc            (VCVTPD2PSrr VR128:$src)>;
2302f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
2303f4a2713aSLionel Sambuc            (VCVTPD2PSXrm addr:$src)>;
2304f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
2305f4a2713aSLionel Sambuc            (VCVTPD2PSYrr VR256:$src)>;
2306f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
2307f4a2713aSLionel Sambuc            (VCVTPD2PSYrm addr:$src)>;
2308f4a2713aSLionel Sambuc
2309f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
2310f4a2713aSLionel Sambuc            (VCVTPS2PDrr VR128:$src)>;
2311f4a2713aSLionel Sambuc  def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
2312f4a2713aSLionel Sambuc            (VCVTPS2PDYrr VR128:$src)>;
2313f4a2713aSLionel Sambuc  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
2314f4a2713aSLionel Sambuc            (VCVTPS2PDYrm addr:$src)>;
2315f4a2713aSLionel Sambuc}
2316f4a2713aSLionel Sambuc
2317f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2318f4a2713aSLionel Sambuc  // Match fround and fextend for 128 conversions
2319f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
2320f4a2713aSLionel Sambuc            (CVTPD2PSrr VR128:$src)>;
2321f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
2322f4a2713aSLionel Sambuc            (CVTPD2PSrm addr:$src)>;
2323f4a2713aSLionel Sambuc
2324f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
2325f4a2713aSLionel Sambuc            (CVTPS2PDrr VR128:$src)>;
2326f4a2713aSLionel Sambuc}
2327f4a2713aSLionel Sambuc
2328f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2329f4a2713aSLionel Sambuc// SSE 1 & 2 - Compare Instructions
2330f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2331f4a2713aSLionel Sambuc
2332f4a2713aSLionel Sambuc// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
2333f4a2713aSLionel Sambucmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
2334f4a2713aSLionel Sambuc                            Operand CC, SDNode OpNode, ValueType VT,
2335f4a2713aSLionel Sambuc                            PatFrag ld_frag, string asm, string asm_alt,
2336*0a6a1f1dSLionel Sambuc                            OpndItins itins, ImmLeaf immLeaf> {
2337f4a2713aSLionel Sambuc  def rr : SIi8<0xC2, MRMSrcReg,
2338f4a2713aSLionel Sambuc                (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
2339*0a6a1f1dSLionel Sambuc                [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))],
2340f4a2713aSLionel Sambuc                itins.rr>, Sched<[itins.Sched]>;
2341f4a2713aSLionel Sambuc  def rm : SIi8<0xC2, MRMSrcMem,
2342f4a2713aSLionel Sambuc                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
2343f4a2713aSLionel Sambuc                [(set RC:$dst, (OpNode (VT RC:$src1),
2344*0a6a1f1dSLionel Sambuc                                         (ld_frag addr:$src2), immLeaf:$cc))],
2345f4a2713aSLionel Sambuc                                         itins.rm>,
2346f4a2713aSLionel Sambuc           Sched<[itins.Sched.Folded, ReadAfterLd]>;
2347f4a2713aSLionel Sambuc
2348f4a2713aSLionel Sambuc  // Accept explicit immediate argument form instead of comparison code.
2349*0a6a1f1dSLionel Sambuc  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2350f4a2713aSLionel Sambuc    def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
2351f4a2713aSLionel Sambuc                      (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
2352f4a2713aSLionel Sambuc                      IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
2353f4a2713aSLionel Sambuc    let mayLoad = 1 in
2354f4a2713aSLionel Sambuc    def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
2355f4a2713aSLionel Sambuc                      (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
2356f4a2713aSLionel Sambuc                      IIC_SSE_ALU_F32S_RM>,
2357f4a2713aSLionel Sambuc                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
2358f4a2713aSLionel Sambuc  }
2359f4a2713aSLionel Sambuc}
2360f4a2713aSLionel Sambuc
2361*0a6a1f1dSLionel Sambucdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
2362f4a2713aSLionel Sambuc                 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2363f4a2713aSLionel Sambuc                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2364*0a6a1f1dSLionel Sambuc                 SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG;
2365*0a6a1f1dSLionel Sambucdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
2366f4a2713aSLionel Sambuc                 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2367f4a2713aSLionel Sambuc                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2368*0a6a1f1dSLionel Sambuc                 SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare
2369f4a2713aSLionel Sambuc                 XD, VEX_4V, VEX_LIG;
2370f4a2713aSLionel Sambuc
2371f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
2372*0a6a1f1dSLionel Sambuc  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
2373f4a2713aSLionel Sambuc                  "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
2374*0a6a1f1dSLionel Sambuc                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S,
2375*0a6a1f1dSLionel Sambuc                  i8immZExt3>, XS;
2376*0a6a1f1dSLionel Sambuc  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
2377f4a2713aSLionel Sambuc                  "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
2378f4a2713aSLionel Sambuc                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2379*0a6a1f1dSLionel Sambuc                  SSE_ALU_F64S, i8immZExt3>, XD;
2380f4a2713aSLionel Sambuc}
2381f4a2713aSLionel Sambuc
2382f4a2713aSLionel Sambucmulticlass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
2383*0a6a1f1dSLionel Sambuc                         Intrinsic Int, string asm, OpndItins itins,
2384*0a6a1f1dSLionel Sambuc                         ImmLeaf immLeaf> {
2385f4a2713aSLionel Sambuc  def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
2386f4a2713aSLionel Sambuc                      (ins VR128:$src1, VR128:$src, CC:$cc), asm,
2387f4a2713aSLionel Sambuc                        [(set VR128:$dst, (Int VR128:$src1,
2388*0a6a1f1dSLionel Sambuc                                               VR128:$src, immLeaf:$cc))],
2389f4a2713aSLionel Sambuc                                               itins.rr>,
2390f4a2713aSLionel Sambuc           Sched<[itins.Sched]>;
2391f4a2713aSLionel Sambuc  def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
2392f4a2713aSLionel Sambuc                      (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
2393f4a2713aSLionel Sambuc                        [(set VR128:$dst, (Int VR128:$src1,
2394*0a6a1f1dSLionel Sambuc                                               (load addr:$src), immLeaf:$cc))],
2395f4a2713aSLionel Sambuc                                               itins.rm>,
2396f4a2713aSLionel Sambuc           Sched<[itins.Sched.Folded, ReadAfterLd]>;
2397f4a2713aSLionel Sambuc}
2398f4a2713aSLionel Sambuc
2399*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
2400f4a2713aSLionel Sambuc  // Aliases to match intrinsics which expect XMM operand(s).
2401f4a2713aSLionel Sambuc  defm Int_VCMPSS  : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
2402f4a2713aSLionel Sambuc                       "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
2403*0a6a1f1dSLionel Sambuc                       SSE_ALU_F32S, i8immZExt5>,
2404f4a2713aSLionel Sambuc                       XS, VEX_4V;
2405f4a2713aSLionel Sambuc  defm Int_VCMPSD  : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
2406f4a2713aSLionel Sambuc                       "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
2407*0a6a1f1dSLionel Sambuc                       SSE_ALU_F32S, i8immZExt5>, // same latency as f32
2408f4a2713aSLionel Sambuc                       XD, VEX_4V;
2409f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
2410f4a2713aSLionel Sambuc    defm Int_CMPSS  : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
2411f4a2713aSLionel Sambuc                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
2412*0a6a1f1dSLionel Sambuc                         SSE_ALU_F32S, i8immZExt3>, XS;
2413f4a2713aSLionel Sambuc    defm Int_CMPSD  : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
2414f4a2713aSLionel Sambuc                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
2415*0a6a1f1dSLionel Sambuc                         SSE_ALU_F64S, i8immZExt3>,
2416f4a2713aSLionel Sambuc                         XD;
2417f4a2713aSLionel Sambuc}
2418*0a6a1f1dSLionel Sambuc}
2419f4a2713aSLionel Sambuc
2420f4a2713aSLionel Sambuc
2421f4a2713aSLionel Sambuc// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
2422f4a2713aSLionel Sambucmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
2423f4a2713aSLionel Sambuc                            ValueType vt, X86MemOperand x86memop,
2424f4a2713aSLionel Sambuc                            PatFrag ld_frag, string OpcodeStr> {
2425f4a2713aSLionel Sambuc  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
2426f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2427f4a2713aSLionel Sambuc                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
2428f4a2713aSLionel Sambuc                     IIC_SSE_COMIS_RR>,
2429f4a2713aSLionel Sambuc          Sched<[WriteFAdd]>;
2430f4a2713aSLionel Sambuc  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
2431f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2432f4a2713aSLionel Sambuc                     [(set EFLAGS, (OpNode (vt RC:$src1),
2433f4a2713aSLionel Sambuc                                           (ld_frag addr:$src2)))],
2434f4a2713aSLionel Sambuc                                           IIC_SSE_COMIS_RM>,
2435f4a2713aSLionel Sambuc          Sched<[WriteFAddLd, ReadAfterLd]>;
2436f4a2713aSLionel Sambuc}
2437f4a2713aSLionel Sambuc
2438f4a2713aSLionel Sambuclet Defs = [EFLAGS] in {
2439f4a2713aSLionel Sambuc  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2440*0a6a1f1dSLionel Sambuc                                  "ucomiss">, PS, VEX, VEX_LIG;
2441f4a2713aSLionel Sambuc  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2442*0a6a1f1dSLionel Sambuc                                  "ucomisd">, PD, VEX, VEX_LIG;
2443f4a2713aSLionel Sambuc  let Pattern = []<dag> in {
2444f4a2713aSLionel Sambuc    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2445*0a6a1f1dSLionel Sambuc                                    "comiss">, PS, VEX, VEX_LIG;
2446f4a2713aSLionel Sambuc    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2447*0a6a1f1dSLionel Sambuc                                    "comisd">, PD, VEX, VEX_LIG;
2448f4a2713aSLionel Sambuc  }
2449f4a2713aSLionel Sambuc
2450*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in {
2451f4a2713aSLionel Sambuc    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2452*0a6a1f1dSLionel Sambuc                              load, "ucomiss">, PS, VEX;
2453f4a2713aSLionel Sambuc    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2454*0a6a1f1dSLionel Sambuc                              load, "ucomisd">, PD, VEX;
2455f4a2713aSLionel Sambuc
2456f4a2713aSLionel Sambuc    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
2457*0a6a1f1dSLionel Sambuc                              load, "comiss">, PS, VEX;
2458f4a2713aSLionel Sambuc    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
2459*0a6a1f1dSLionel Sambuc                              load, "comisd">, PD, VEX;
2460*0a6a1f1dSLionel Sambuc  }
2461f4a2713aSLionel Sambuc  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2462*0a6a1f1dSLionel Sambuc                                  "ucomiss">, PS;
2463f4a2713aSLionel Sambuc  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2464*0a6a1f1dSLionel Sambuc                                  "ucomisd">, PD;
2465f4a2713aSLionel Sambuc
2466f4a2713aSLionel Sambuc  let Pattern = []<dag> in {
2467f4a2713aSLionel Sambuc    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2468*0a6a1f1dSLionel Sambuc                                    "comiss">, PS;
2469f4a2713aSLionel Sambuc    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2470*0a6a1f1dSLionel Sambuc                                    "comisd">, PD;
2471f4a2713aSLionel Sambuc  }
2472f4a2713aSLionel Sambuc
2473*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in {
2474f4a2713aSLionel Sambuc    defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2475*0a6a1f1dSLionel Sambuc                                load, "ucomiss">, PS;
2476f4a2713aSLionel Sambuc    defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2477*0a6a1f1dSLionel Sambuc                                load, "ucomisd">, PD;
2478f4a2713aSLionel Sambuc
2479f4a2713aSLionel Sambuc    defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
2480*0a6a1f1dSLionel Sambuc                                    "comiss">, PS;
2481f4a2713aSLionel Sambuc    defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
2482*0a6a1f1dSLionel Sambuc                                    "comisd">, PD;
2483*0a6a1f1dSLionel Sambuc  }
2484f4a2713aSLionel Sambuc} // Defs = [EFLAGS]
2485f4a2713aSLionel Sambuc
2486f4a2713aSLionel Sambuc// sse12_cmp_packed - sse 1 & 2 compare packed instructions
2487f4a2713aSLionel Sambucmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
2488f4a2713aSLionel Sambuc                            Operand CC, Intrinsic Int, string asm,
2489*0a6a1f1dSLionel Sambuc                            string asm_alt, Domain d, ImmLeaf immLeaf,
2490f4a2713aSLionel Sambuc                            OpndItins itins = SSE_ALU_F32P> {
2491f4a2713aSLionel Sambuc  def rri : PIi8<0xC2, MRMSrcReg,
2492f4a2713aSLionel Sambuc             (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
2493*0a6a1f1dSLionel Sambuc             [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
2494f4a2713aSLionel Sambuc             itins.rr, d>,
2495f4a2713aSLionel Sambuc            Sched<[WriteFAdd]>;
2496f4a2713aSLionel Sambuc  def rmi : PIi8<0xC2, MRMSrcMem,
2497f4a2713aSLionel Sambuc             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
2498*0a6a1f1dSLionel Sambuc             [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), immLeaf:$cc))],
2499f4a2713aSLionel Sambuc             itins.rm, d>,
2500f4a2713aSLionel Sambuc            Sched<[WriteFAddLd, ReadAfterLd]>;
2501f4a2713aSLionel Sambuc
2502f4a2713aSLionel Sambuc  // Accept explicit immediate argument form instead of comparison code.
2503*0a6a1f1dSLionel Sambuc  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2504f4a2713aSLionel Sambuc    def rri_alt : PIi8<0xC2, MRMSrcReg,
2505f4a2713aSLionel Sambuc               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
2506f4a2713aSLionel Sambuc               asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
2507f4a2713aSLionel Sambuc    def rmi_alt : PIi8<0xC2, MRMSrcMem,
2508f4a2713aSLionel Sambuc               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
2509f4a2713aSLionel Sambuc               asm_alt, [], itins.rm, d>,
2510f4a2713aSLionel Sambuc               Sched<[WriteFAddLd, ReadAfterLd]>;
2511f4a2713aSLionel Sambuc  }
2512f4a2713aSLionel Sambuc}
2513f4a2713aSLionel Sambuc
2514f4a2713aSLionel Sambucdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
2515f4a2713aSLionel Sambuc               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2516f4a2713aSLionel Sambuc               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2517*0a6a1f1dSLionel Sambuc               SSEPackedSingle, i8immZExt5>, PS, VEX_4V;
2518f4a2713aSLionel Sambucdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
2519f4a2713aSLionel Sambuc               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2520f4a2713aSLionel Sambuc               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2521*0a6a1f1dSLionel Sambuc               SSEPackedDouble, i8immZExt5>, PD, VEX_4V;
2522f4a2713aSLionel Sambucdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
2523f4a2713aSLionel Sambuc               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2524f4a2713aSLionel Sambuc               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2525*0a6a1f1dSLionel Sambuc               SSEPackedSingle, i8immZExt5>, PS, VEX_4V, VEX_L;
2526f4a2713aSLionel Sambucdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
2527f4a2713aSLionel Sambuc               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2528f4a2713aSLionel Sambuc               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2529*0a6a1f1dSLionel Sambuc               SSEPackedDouble, i8immZExt5>, PD, VEX_4V, VEX_L;
2530f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
2531f4a2713aSLionel Sambuc  defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
2532f4a2713aSLionel Sambuc                 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
2533f4a2713aSLionel Sambuc                 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2534*0a6a1f1dSLionel Sambuc                 SSEPackedSingle, i8immZExt5, SSE_ALU_F32P>, PS;
2535f4a2713aSLionel Sambuc  defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
2536f4a2713aSLionel Sambuc                 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
2537f4a2713aSLionel Sambuc                 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2538*0a6a1f1dSLionel Sambuc                 SSEPackedDouble, i8immZExt5, SSE_ALU_F64P>, PD;
2539f4a2713aSLionel Sambuc}
2540f4a2713aSLionel Sambuc
2541f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2542f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2543f4a2713aSLionel Sambuc          (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2544f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2545f4a2713aSLionel Sambuc          (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2546f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2547f4a2713aSLionel Sambuc          (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2548f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2549f4a2713aSLionel Sambuc          (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2550f4a2713aSLionel Sambuc
2551f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
2552f4a2713aSLionel Sambuc          (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
2553f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
2554f4a2713aSLionel Sambuc          (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
2555f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
2556f4a2713aSLionel Sambuc          (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
2557f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
2558f4a2713aSLionel Sambuc          (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
2559f4a2713aSLionel Sambuc}
2560f4a2713aSLionel Sambuc
2561f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
2562f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2563f4a2713aSLionel Sambuc          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2564f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2565f4a2713aSLionel Sambuc          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2566f4a2713aSLionel Sambuc}
2567f4a2713aSLionel Sambuc
2568f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2569f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2570f4a2713aSLionel Sambuc          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2571f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2572f4a2713aSLionel Sambuc          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2573f4a2713aSLionel Sambuc}
2574f4a2713aSLionel Sambuc
2575f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2576f4a2713aSLionel Sambuc// SSE 1 & 2 - Shuffle Instructions
2577f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2578f4a2713aSLionel Sambuc
2579*0a6a1f1dSLionel Sambuc/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
2580f4a2713aSLionel Sambucmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
2581f4a2713aSLionel Sambuc                         ValueType vt, string asm, PatFrag mem_frag,
2582*0a6a1f1dSLionel Sambuc                         Domain d> {
2583f4a2713aSLionel Sambuc  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
2584f4a2713aSLionel Sambuc                   (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
2585f4a2713aSLionel Sambuc                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
2586f4a2713aSLionel Sambuc                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
2587*0a6a1f1dSLionel Sambuc            Sched<[WriteFShuffleLd, ReadAfterLd]>;
2588f4a2713aSLionel Sambuc  def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
2589f4a2713aSLionel Sambuc                 (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
2590f4a2713aSLionel Sambuc                 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
2591f4a2713aSLionel Sambuc                                     (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
2592*0a6a1f1dSLionel Sambuc            Sched<[WriteFShuffle]>;
2593f4a2713aSLionel Sambuc}
2594f4a2713aSLionel Sambuc
2595f4a2713aSLionel Sambucdefm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
2596f4a2713aSLionel Sambuc           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2597*0a6a1f1dSLionel Sambuc           loadv4f32, SSEPackedSingle>, PS, VEX_4V;
2598f4a2713aSLionel Sambucdefm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
2599f4a2713aSLionel Sambuc           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2600*0a6a1f1dSLionel Sambuc           loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
2601f4a2713aSLionel Sambucdefm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
2602f4a2713aSLionel Sambuc           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2603*0a6a1f1dSLionel Sambuc           loadv2f64, SSEPackedDouble>, PD, VEX_4V;
2604f4a2713aSLionel Sambucdefm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
2605f4a2713aSLionel Sambuc           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2606*0a6a1f1dSLionel Sambuc           loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
2607f4a2713aSLionel Sambuc
2608f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
2609f4a2713aSLionel Sambuc  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
2610f4a2713aSLionel Sambuc                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2611*0a6a1f1dSLionel Sambuc                    memopv4f32, SSEPackedSingle>, PS;
2612f4a2713aSLionel Sambuc  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
2613f4a2713aSLionel Sambuc                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2614*0a6a1f1dSLionel Sambuc                    memopv2f64, SSEPackedDouble>, PD;
2615f4a2713aSLionel Sambuc}
2616f4a2713aSLionel Sambuc
2617f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2618f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Shufp VR128:$src1,
2619f4a2713aSLionel Sambuc                       (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
2620f4a2713aSLionel Sambuc            (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2621f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2622f4a2713aSLionel Sambuc            (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2623f4a2713aSLionel Sambuc
2624f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Shufp VR128:$src1,
2625f4a2713aSLionel Sambuc                       (loadv2i64 addr:$src2), (i8 imm:$imm))),
2626f4a2713aSLionel Sambuc            (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2627f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2628f4a2713aSLionel Sambuc            (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2629f4a2713aSLionel Sambuc
2630f4a2713aSLionel Sambuc  // 256-bit patterns
2631f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2632f4a2713aSLionel Sambuc            (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2633f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Shufp VR256:$src1,
2634f4a2713aSLionel Sambuc                      (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
2635f4a2713aSLionel Sambuc            (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2636f4a2713aSLionel Sambuc
2637f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2638f4a2713aSLionel Sambuc            (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2639f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Shufp VR256:$src1,
2640f4a2713aSLionel Sambuc                              (loadv4i64 addr:$src2), (i8 imm:$imm))),
2641f4a2713aSLionel Sambuc            (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2642f4a2713aSLionel Sambuc}
2643f4a2713aSLionel Sambuc
2644f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
2645f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Shufp VR128:$src1,
2646f4a2713aSLionel Sambuc                       (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
2647f4a2713aSLionel Sambuc            (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2648f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2649f4a2713aSLionel Sambuc            (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2650f4a2713aSLionel Sambuc}
2651f4a2713aSLionel Sambuc
2652f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2653f4a2713aSLionel Sambuc  // Generic SHUFPD patterns
2654f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Shufp VR128:$src1,
2655f4a2713aSLionel Sambuc                       (memopv2i64 addr:$src2), (i8 imm:$imm))),
2656f4a2713aSLionel Sambuc            (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2657f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2658f4a2713aSLionel Sambuc            (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2659f4a2713aSLionel Sambuc}
2660f4a2713aSLionel Sambuc
2661f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2662*0a6a1f1dSLionel Sambuc// SSE 1 & 2 - Unpack FP Instructions
2663f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2664f4a2713aSLionel Sambuc
2665*0a6a1f1dSLionel Sambuc/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
2666f4a2713aSLionel Sambucmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
2667f4a2713aSLionel Sambuc                                   PatFrag mem_frag, RegisterClass RC,
2668f4a2713aSLionel Sambuc                                   X86MemOperand x86memop, string asm,
2669f4a2713aSLionel Sambuc                                   Domain d> {
2670f4a2713aSLionel Sambuc    def rr : PI<opc, MRMSrcReg,
2671f4a2713aSLionel Sambuc                (outs RC:$dst), (ins RC:$src1, RC:$src2),
2672f4a2713aSLionel Sambuc                asm, [(set RC:$dst,
2673f4a2713aSLionel Sambuc                           (vt (OpNode RC:$src1, RC:$src2)))],
2674*0a6a1f1dSLionel Sambuc                           IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>;
2675f4a2713aSLionel Sambuc    def rm : PI<opc, MRMSrcMem,
2676f4a2713aSLionel Sambuc                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2677f4a2713aSLionel Sambuc                asm, [(set RC:$dst,
2678f4a2713aSLionel Sambuc                           (vt (OpNode RC:$src1,
2679f4a2713aSLionel Sambuc                                       (mem_frag addr:$src2))))],
2680f4a2713aSLionel Sambuc                                       IIC_SSE_UNPCK, d>,
2681*0a6a1f1dSLionel Sambuc             Sched<[WriteFShuffleLd, ReadAfterLd]>;
2682f4a2713aSLionel Sambuc}
2683f4a2713aSLionel Sambuc
2684f4a2713aSLionel Sambucdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
2685f4a2713aSLionel Sambuc      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2686*0a6a1f1dSLionel Sambuc                     SSEPackedSingle>, PS, VEX_4V;
2687f4a2713aSLionel Sambucdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
2688f4a2713aSLionel Sambuc      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2689*0a6a1f1dSLionel Sambuc                     SSEPackedDouble>, PD, VEX_4V;
2690f4a2713aSLionel Sambucdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
2691f4a2713aSLionel Sambuc      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2692*0a6a1f1dSLionel Sambuc                     SSEPackedSingle>, PS, VEX_4V;
2693f4a2713aSLionel Sambucdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
2694f4a2713aSLionel Sambuc      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2695*0a6a1f1dSLionel Sambuc                     SSEPackedDouble>, PD, VEX_4V;
2696f4a2713aSLionel Sambuc
2697f4a2713aSLionel Sambucdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
2698f4a2713aSLionel Sambuc      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2699*0a6a1f1dSLionel Sambuc                     SSEPackedSingle>, PS, VEX_4V, VEX_L;
2700f4a2713aSLionel Sambucdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
2701f4a2713aSLionel Sambuc      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2702*0a6a1f1dSLionel Sambuc                     SSEPackedDouble>, PD, VEX_4V, VEX_L;
2703f4a2713aSLionel Sambucdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
2704f4a2713aSLionel Sambuc      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705*0a6a1f1dSLionel Sambuc                     SSEPackedSingle>, PS, VEX_4V, VEX_L;
2706f4a2713aSLionel Sambucdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
2707f4a2713aSLionel Sambuc      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2708*0a6a1f1dSLionel Sambuc                     SSEPackedDouble>, PD, VEX_4V, VEX_L;
2709f4a2713aSLionel Sambuc
2710f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
2711f4a2713aSLionel Sambuc  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
2712f4a2713aSLionel Sambuc        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
2713*0a6a1f1dSLionel Sambuc                       SSEPackedSingle>, PS;
2714f4a2713aSLionel Sambuc  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
2715f4a2713aSLionel Sambuc        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
2716*0a6a1f1dSLionel Sambuc                       SSEPackedDouble>, PD;
2717f4a2713aSLionel Sambuc  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
2718f4a2713aSLionel Sambuc        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
2719*0a6a1f1dSLionel Sambuc                       SSEPackedSingle>, PS;
2720f4a2713aSLionel Sambuc  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
2721f4a2713aSLionel Sambuc        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
2722*0a6a1f1dSLionel Sambuc                       SSEPackedDouble>, PD;
2723f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst"
2724f4a2713aSLionel Sambuc
2725f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
2726f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
2727f4a2713aSLionel Sambuc            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
2728f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
2729f4a2713aSLionel Sambuc            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
2730f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
2731f4a2713aSLionel Sambuc            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
2732f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
2733f4a2713aSLionel Sambuc            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
2734f4a2713aSLionel Sambuc
2735f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
2736f4a2713aSLionel Sambuc            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
2737f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
2738f4a2713aSLionel Sambuc            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
2739f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
2740f4a2713aSLionel Sambuc            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
2741f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
2742f4a2713aSLionel Sambuc            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
2743f4a2713aSLionel Sambuc}
2744f4a2713aSLionel Sambuc
2745f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2746f4a2713aSLionel Sambuc  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2747f4a2713aSLionel Sambuc  // problem is during lowering, where it's not possible to recognize the load
2748f4a2713aSLionel Sambuc  // fold cause it has two uses through a bitcast. One use disappears at isel
2749f4a2713aSLionel Sambuc  // time and the fold opportunity reappears.
2750f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movddup VR128:$src)),
2751f4a2713aSLionel Sambuc            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
2752f4a2713aSLionel Sambuc}
2753f4a2713aSLionel Sambuc
2754f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
2755f4a2713aSLionel Sambuc  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2756f4a2713aSLionel Sambuc  // problem is during lowering, where it's not possible to recognize the load
2757f4a2713aSLionel Sambuc  // fold cause it has two uses through a bitcast. One use disappears at isel
2758f4a2713aSLionel Sambuc  // time and the fold opportunity reappears.
2759f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Movddup VR128:$src)),
2760f4a2713aSLionel Sambuc            (UNPCKLPDrr VR128:$src, VR128:$src)>;
2761f4a2713aSLionel Sambuc}
2762f4a2713aSLionel Sambuc
2763f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2764f4a2713aSLionel Sambuc// SSE 1 & 2 - Extract Floating-Point Sign mask
2765f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2766f4a2713aSLionel Sambuc
2767f4a2713aSLionel Sambuc/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
2768f4a2713aSLionel Sambucmulticlass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
2769f4a2713aSLionel Sambuc                                Domain d> {
2770f4a2713aSLionel Sambuc  def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
2771f4a2713aSLionel Sambuc              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2772f4a2713aSLionel Sambuc              [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
2773f4a2713aSLionel Sambuc              Sched<[WriteVecLogic]>;
2774f4a2713aSLionel Sambuc}
2775f4a2713aSLionel Sambuc
2776f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
2777f4a2713aSLionel Sambuc  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
2778*0a6a1f1dSLionel Sambuc                                        "movmskps", SSEPackedSingle>, PS, VEX;
2779f4a2713aSLionel Sambuc  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
2780*0a6a1f1dSLionel Sambuc                                        "movmskpd", SSEPackedDouble>, PD, VEX;
2781f4a2713aSLionel Sambuc  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
2782*0a6a1f1dSLionel Sambuc                                        "movmskps", SSEPackedSingle>, PS,
2783f4a2713aSLionel Sambuc                                        VEX, VEX_L;
2784f4a2713aSLionel Sambuc  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
2785*0a6a1f1dSLionel Sambuc                                        "movmskpd", SSEPackedDouble>, PD,
2786*0a6a1f1dSLionel Sambuc                                        VEX, VEX_L;
2787f4a2713aSLionel Sambuc
2788f4a2713aSLionel Sambuc  def : Pat<(i32 (X86fgetsign FR32:$src)),
2789f4a2713aSLionel Sambuc            (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
2790f4a2713aSLionel Sambuc  def : Pat<(i64 (X86fgetsign FR32:$src)),
2791f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i64 0),
2792f4a2713aSLionel Sambuc             (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>;
2793f4a2713aSLionel Sambuc  def : Pat<(i32 (X86fgetsign FR64:$src)),
2794f4a2713aSLionel Sambuc            (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
2795f4a2713aSLionel Sambuc  def : Pat<(i64 (X86fgetsign FR64:$src)),
2796f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i64 0),
2797f4a2713aSLionel Sambuc             (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>;
2798f4a2713aSLionel Sambuc}
2799f4a2713aSLionel Sambuc
2800f4a2713aSLionel Sambucdefm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
2801*0a6a1f1dSLionel Sambuc                                     SSEPackedSingle>, PS;
2802f4a2713aSLionel Sambucdefm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
2803*0a6a1f1dSLionel Sambuc                                     SSEPackedDouble>, PD;
2804f4a2713aSLionel Sambuc
2805f4a2713aSLionel Sambucdef : Pat<(i32 (X86fgetsign FR32:$src)),
2806f4a2713aSLionel Sambuc          (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>,
2807f4a2713aSLionel Sambuc      Requires<[UseSSE1]>;
2808f4a2713aSLionel Sambucdef : Pat<(i64 (X86fgetsign FR32:$src)),
2809f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i64 0),
2810f4a2713aSLionel Sambuc           (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>,
2811f4a2713aSLionel Sambuc      Requires<[UseSSE1]>;
2812f4a2713aSLionel Sambucdef : Pat<(i32 (X86fgetsign FR64:$src)),
2813f4a2713aSLionel Sambuc          (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>,
2814f4a2713aSLionel Sambuc      Requires<[UseSSE2]>;
2815f4a2713aSLionel Sambucdef : Pat<(i64 (X86fgetsign FR64:$src)),
2816f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i64 0),
2817f4a2713aSLionel Sambuc           (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>,
2818f4a2713aSLionel Sambuc      Requires<[UseSSE2]>;
2819f4a2713aSLionel Sambuc
2820f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
2821f4a2713aSLionel Sambuc// SSE2 - Packed Integer Logical Instructions
2822f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
2823f4a2713aSLionel Sambuc
2824f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions
2825f4a2713aSLionel Sambuc
2826f4a2713aSLionel Sambuc/// PDI_binop_rm - Simple SSE2 binary operator.
2827f4a2713aSLionel Sambucmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2828f4a2713aSLionel Sambuc                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2829f4a2713aSLionel Sambuc                        X86MemOperand x86memop, OpndItins itins,
2830f4a2713aSLionel Sambuc                        bit IsCommutable, bit Is2Addr> {
2831f4a2713aSLionel Sambuc  let isCommutable = IsCommutable in
2832f4a2713aSLionel Sambuc  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
2833f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
2834f4a2713aSLionel Sambuc       !if(Is2Addr,
2835f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2836f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2837f4a2713aSLionel Sambuc       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
2838f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
2839f4a2713aSLionel Sambuc  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
2840f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
2841f4a2713aSLionel Sambuc       !if(Is2Addr,
2842f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2843f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2844f4a2713aSLionel Sambuc       [(set RC:$dst, (OpVT (OpNode RC:$src1,
2845f4a2713aSLionel Sambuc                                     (bitconvert (memop_frag addr:$src2)))))],
2846f4a2713aSLionel Sambuc                                     itins.rm>,
2847f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
2848f4a2713aSLionel Sambuc}
2849f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
2850f4a2713aSLionel Sambuc
2851f4a2713aSLionel Sambucmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
2852f4a2713aSLionel Sambuc                         ValueType OpVT128, ValueType OpVT256,
2853f4a2713aSLionel Sambuc                         OpndItins itins, bit IsCommutable = 0> {
2854*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in
2855f4a2713aSLionel Sambuc  defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
2856f4a2713aSLionel Sambuc                    VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
2857f4a2713aSLionel Sambuc
2858f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
2859f4a2713aSLionel Sambuc  defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
2860f4a2713aSLionel Sambuc                           memopv2i64, i128mem, itins, IsCommutable, 1>;
2861f4a2713aSLionel Sambuc
2862*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2, NoVLX] in
2863f4a2713aSLionel Sambuc  defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
2864f4a2713aSLionel Sambuc                               OpVT256, VR256, loadv4i64, i256mem, itins,
2865f4a2713aSLionel Sambuc                               IsCommutable, 0>, VEX_4V, VEX_L;
2866f4a2713aSLionel Sambuc}
2867f4a2713aSLionel Sambuc
2868f4a2713aSLionel Sambuc// These are ordered here for pattern ordering requirements with the fp versions
2869f4a2713aSLionel Sambuc
2870*0a6a1f1dSLionel Sambucdefm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
2871*0a6a1f1dSLionel Sambuc                           SSE_VEC_BIT_ITINS_P, 1>;
2872*0a6a1f1dSLionel Sambucdefm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
2873*0a6a1f1dSLionel Sambuc                           SSE_VEC_BIT_ITINS_P, 1>;
2874*0a6a1f1dSLionel Sambucdefm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
2875*0a6a1f1dSLionel Sambuc                           SSE_VEC_BIT_ITINS_P, 1>;
2876f4a2713aSLionel Sambucdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
2877*0a6a1f1dSLionel Sambuc                           SSE_VEC_BIT_ITINS_P, 0>;
2878f4a2713aSLionel Sambuc
2879f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2880f4a2713aSLionel Sambuc// SSE 1 & 2 - Logical Instructions
2881f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2882f4a2713aSLionel Sambuc
2883f4a2713aSLionel Sambuc/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
2884f4a2713aSLionel Sambuc///
2885f4a2713aSLionel Sambucmulticlass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
2886f4a2713aSLionel Sambuc                                       SDNode OpNode, OpndItins itins> {
2887f4a2713aSLionel Sambuc  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
2888f4a2713aSLionel Sambuc              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
2889*0a6a1f1dSLionel Sambuc              PS, VEX_4V;
2890f4a2713aSLionel Sambuc
2891f4a2713aSLionel Sambuc  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
2892f4a2713aSLionel Sambuc        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
2893*0a6a1f1dSLionel Sambuc        PD, VEX_4V;
2894f4a2713aSLionel Sambuc
2895f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
2896f4a2713aSLionel Sambuc    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
2897f4a2713aSLionel Sambuc                f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
2898*0a6a1f1dSLionel Sambuc                PS;
2899f4a2713aSLionel Sambuc
2900f4a2713aSLionel Sambuc    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
2901f4a2713aSLionel Sambuc                f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
2902*0a6a1f1dSLionel Sambuc                PD;
2903f4a2713aSLionel Sambuc  }
2904f4a2713aSLionel Sambuc}
2905f4a2713aSLionel Sambuc
2906f4a2713aSLionel Sambuc// Alias bitwise logical operations using SSE logical ops on packed FP values.
2907f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
2908f4a2713aSLionel Sambuc  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
2909f4a2713aSLionel Sambuc                SSE_BIT_ITINS_P>;
2910f4a2713aSLionel Sambuc  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for,
2911f4a2713aSLionel Sambuc                SSE_BIT_ITINS_P>;
2912f4a2713aSLionel Sambuc  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
2913f4a2713aSLionel Sambuc                SSE_BIT_ITINS_P>;
2914f4a2713aSLionel Sambuc
2915f4a2713aSLionel Sambuc  let isCommutable = 0 in
2916f4a2713aSLionel Sambuc    defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
2917f4a2713aSLionel Sambuc                  SSE_BIT_ITINS_P>;
2918f4a2713aSLionel Sambuc}
2919f4a2713aSLionel Sambuc
2920f4a2713aSLionel Sambuc/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
2921f4a2713aSLionel Sambuc///
2922f4a2713aSLionel Sambucmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
2923f4a2713aSLionel Sambuc                                   SDNode OpNode> {
2924*0a6a1f1dSLionel Sambuc  let Predicates = [HasAVX, NoVLX] in {
2925f4a2713aSLionel Sambuc  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
2926f4a2713aSLionel Sambuc        !strconcat(OpcodeStr, "ps"), f256mem,
2927f4a2713aSLionel Sambuc        [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
2928f4a2713aSLionel Sambuc        [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
2929*0a6a1f1dSLionel Sambuc                           (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L;
2930f4a2713aSLionel Sambuc
2931f4a2713aSLionel Sambuc  defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
2932f4a2713aSLionel Sambuc        !strconcat(OpcodeStr, "pd"), f256mem,
2933f4a2713aSLionel Sambuc        [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2934f4a2713aSLionel Sambuc                                  (bc_v4i64 (v4f64 VR256:$src2))))],
2935f4a2713aSLionel Sambuc        [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2936f4a2713aSLionel Sambuc                                  (loadv4i64 addr:$src2)))], 0>,
2937*0a6a1f1dSLionel Sambuc                                  PD, VEX_4V, VEX_L;
2938f4a2713aSLionel Sambuc
2939f4a2713aSLionel Sambuc  // In AVX no need to add a pattern for 128-bit logical rr ps, because they
2940f4a2713aSLionel Sambuc  // are all promoted to v2i64, and the patterns are covered by the int
2941f4a2713aSLionel Sambuc  // version. This is needed in SSE only, because v2i64 isn't supported on
2942f4a2713aSLionel Sambuc  // SSE1, but only on SSE2.
2943f4a2713aSLionel Sambuc  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2944f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "ps"), f128mem, [],
2945f4a2713aSLionel Sambuc       [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2946*0a6a1f1dSLionel Sambuc                                 (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V;
2947f4a2713aSLionel Sambuc
2948f4a2713aSLionel Sambuc  defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2949f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "pd"), f128mem,
2950f4a2713aSLionel Sambuc       [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2951f4a2713aSLionel Sambuc                                 (bc_v2i64 (v2f64 VR128:$src2))))],
2952f4a2713aSLionel Sambuc       [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2953f4a2713aSLionel Sambuc                                 (loadv2i64 addr:$src2)))], 0>,
2954*0a6a1f1dSLionel Sambuc                                                 PD, VEX_4V;
2955*0a6a1f1dSLionel Sambuc  }
2956f4a2713aSLionel Sambuc
2957f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
2958f4a2713aSLionel Sambuc    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2959f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "ps"), f128mem,
2960f4a2713aSLionel Sambuc         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
2961f4a2713aSLionel Sambuc         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2962*0a6a1f1dSLionel Sambuc                                   (memopv2i64 addr:$src2)))]>, PS;
2963f4a2713aSLionel Sambuc
2964f4a2713aSLionel Sambuc    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2965f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "pd"), f128mem,
2966f4a2713aSLionel Sambuc         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2967f4a2713aSLionel Sambuc                                   (bc_v2i64 (v2f64 VR128:$src2))))],
2968f4a2713aSLionel Sambuc         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2969*0a6a1f1dSLionel Sambuc                                   (memopv2i64 addr:$src2)))]>, PD;
2970f4a2713aSLionel Sambuc  }
2971f4a2713aSLionel Sambuc}
2972f4a2713aSLionel Sambuc
2973f4a2713aSLionel Sambucdefm AND  : sse12_fp_packed_logical<0x54, "and", and>;
2974f4a2713aSLionel Sambucdefm OR   : sse12_fp_packed_logical<0x56, "or", or>;
2975f4a2713aSLionel Sambucdefm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
2976f4a2713aSLionel Sambuclet isCommutable = 0 in
2977f4a2713aSLionel Sambuc  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
2978f4a2713aSLionel Sambuc
2979*0a6a1f1dSLionel Sambuc// AVX1 requires type coercions in order to fold loads directly into logical
2980*0a6a1f1dSLionel Sambuc// operations.
2981*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX1Only] in {
2982*0a6a1f1dSLionel Sambuc  def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))),
2983*0a6a1f1dSLionel Sambuc            (VANDPSYrm VR256:$src1, addr:$src2)>;
2984*0a6a1f1dSLionel Sambuc  def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))),
2985*0a6a1f1dSLionel Sambuc            (VORPSYrm VR256:$src1, addr:$src2)>;
2986*0a6a1f1dSLionel Sambuc  def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))),
2987*0a6a1f1dSLionel Sambuc            (VXORPSYrm VR256:$src1, addr:$src2)>;
2988*0a6a1f1dSLionel Sambuc  def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))),
2989*0a6a1f1dSLionel Sambuc            (VANDNPSYrm VR256:$src1, addr:$src2)>;
2990*0a6a1f1dSLionel Sambuc}
2991*0a6a1f1dSLionel Sambuc
2992f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2993f4a2713aSLionel Sambuc// SSE 1 & 2 - Arithmetic Instructions
2994f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
2995f4a2713aSLionel Sambuc
2996f4a2713aSLionel Sambuc/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
2997f4a2713aSLionel Sambuc/// vector forms.
2998f4a2713aSLionel Sambuc///
2999f4a2713aSLionel Sambuc/// In addition, we also have a special variant of the scalar form here to
3000f4a2713aSLionel Sambuc/// represent the associated intrinsic operation.  This form is unlike the
3001f4a2713aSLionel Sambuc/// plain scalar form, in that it takes an entire vector (instead of a scalar)
3002f4a2713aSLionel Sambuc/// and leaves the top elements unmodified (therefore these cannot be commuted).
3003f4a2713aSLionel Sambuc///
3004f4a2713aSLionel Sambuc/// These three forms can each be reg+reg or reg+mem.
3005f4a2713aSLionel Sambuc///
3006f4a2713aSLionel Sambuc
3007f4a2713aSLionel Sambuc/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
3008f4a2713aSLionel Sambuc/// classes below
3009f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
3010f4a2713aSLionel Sambuc                                  SDNode OpNode, SizeItins itins> {
3011*0a6a1f1dSLionel Sambuc  let Predicates = [HasAVX, NoVLX] in {
3012f4a2713aSLionel Sambuc  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
3013f4a2713aSLionel Sambuc                               VR128, v4f32, f128mem, loadv4f32,
3014*0a6a1f1dSLionel Sambuc                               SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
3015f4a2713aSLionel Sambuc  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
3016f4a2713aSLionel Sambuc                               VR128, v2f64, f128mem, loadv2f64,
3017*0a6a1f1dSLionel Sambuc                               SSEPackedDouble, itins.d, 0>, PD, VEX_4V;
3018f4a2713aSLionel Sambuc
3019f4a2713aSLionel Sambuc  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
3020f4a2713aSLionel Sambuc                        OpNode, VR256, v8f32, f256mem, loadv8f32,
3021*0a6a1f1dSLionel Sambuc                        SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L;
3022f4a2713aSLionel Sambuc  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
3023f4a2713aSLionel Sambuc                        OpNode, VR256, v4f64, f256mem, loadv4f64,
3024*0a6a1f1dSLionel Sambuc                        SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
3025*0a6a1f1dSLionel Sambuc  }
3026f4a2713aSLionel Sambuc
3027f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
3028f4a2713aSLionel Sambuc    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
3029f4a2713aSLionel Sambuc                              v4f32, f128mem, memopv4f32, SSEPackedSingle,
3030*0a6a1f1dSLionel Sambuc                              itins.s>, PS;
3031f4a2713aSLionel Sambuc    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
3032f4a2713aSLionel Sambuc                              v2f64, f128mem, memopv2f64, SSEPackedDouble,
3033*0a6a1f1dSLionel Sambuc                              itins.d>, PD;
3034f4a2713aSLionel Sambuc  }
3035f4a2713aSLionel Sambuc}
3036f4a2713aSLionel Sambuc
3037f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3038f4a2713aSLionel Sambuc                                  SizeItins itins> {
3039f4a2713aSLionel Sambuc  defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
3040f4a2713aSLionel Sambuc                         OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;
3041f4a2713aSLionel Sambuc  defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
3042f4a2713aSLionel Sambuc                         OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;
3043f4a2713aSLionel Sambuc
3044f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
3045f4a2713aSLionel Sambuc    defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
3046f4a2713aSLionel Sambuc                              OpNode, FR32, f32mem, itins.s>, XS;
3047f4a2713aSLionel Sambuc    defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
3048f4a2713aSLionel Sambuc                              OpNode, FR64, f64mem, itins.d>, XD;
3049f4a2713aSLionel Sambuc  }
3050f4a2713aSLionel Sambuc}
3051f4a2713aSLionel Sambuc
3052f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
3053f4a2713aSLionel Sambuc                                      SizeItins itins> {
3054f4a2713aSLionel Sambuc  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
3055f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
3056f4a2713aSLionel Sambuc                   itins.s, 0>, XS, VEX_4V, VEX_LIG;
3057f4a2713aSLionel Sambuc  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
3058f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
3059f4a2713aSLionel Sambuc                   itins.d, 0>, XD, VEX_4V, VEX_LIG;
3060f4a2713aSLionel Sambuc
3061f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in {
3062f4a2713aSLionel Sambuc    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
3063f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
3064f4a2713aSLionel Sambuc                   itins.s>, XS;
3065f4a2713aSLionel Sambuc    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
3066f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
3067f4a2713aSLionel Sambuc                   itins.d>, XD;
3068f4a2713aSLionel Sambuc  }
3069f4a2713aSLionel Sambuc}
3070f4a2713aSLionel Sambuc
3071f4a2713aSLionel Sambuc// Binary Arithmetic instructions
3072f4a2713aSLionel Sambucdefm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
3073f4a2713aSLionel Sambuc           basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
3074f4a2713aSLionel Sambuc           basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
3075f4a2713aSLionel Sambucdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
3076f4a2713aSLionel Sambuc           basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
3077f4a2713aSLionel Sambuc           basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
3078f4a2713aSLionel Sambuclet isCommutable = 0 in {
3079f4a2713aSLionel Sambuc  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
3080f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
3081f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
3082f4a2713aSLionel Sambuc  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
3083f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
3084f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
3085f4a2713aSLionel Sambuc  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
3086f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
3087f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
3088f4a2713aSLionel Sambuc  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
3089f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
3090f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
3091f4a2713aSLionel Sambuc}
3092f4a2713aSLionel Sambuc
3093f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
3094f4a2713aSLionel Sambuc  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>,
3095f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
3096f4a2713aSLionel Sambuc  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>,
3097f4a2713aSLionel Sambuc             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
3098f4a2713aSLionel Sambuc}
3099f4a2713aSLionel Sambuc
3100*0a6a1f1dSLionel Sambuc// Patterns used to select SSE scalar fp arithmetic instructions from
3101*0a6a1f1dSLionel Sambuc// a scalar fp operation followed by a blend.
3102*0a6a1f1dSLionel Sambuc//
3103*0a6a1f1dSLionel Sambuc// These patterns know, for example, how to select an ADDSS from a
3104*0a6a1f1dSLionel Sambuc// float add plus vector insert.
3105*0a6a1f1dSLionel Sambuc//
3106*0a6a1f1dSLionel Sambuc// The effect is that the backend no longer emits unnecessary vector
3107*0a6a1f1dSLionel Sambuc// insert instructions immediately after SSE scalar fp instructions
3108*0a6a1f1dSLionel Sambuc// like addss or mulss.
3109*0a6a1f1dSLionel Sambuc//
3110*0a6a1f1dSLionel Sambuc// For example, given the following code:
3111*0a6a1f1dSLionel Sambuc//   __m128 foo(__m128 A, __m128 B) {
3112*0a6a1f1dSLionel Sambuc//     A[0] += B[0];
3113*0a6a1f1dSLionel Sambuc//     return A;
3114*0a6a1f1dSLionel Sambuc//   }
3115*0a6a1f1dSLionel Sambuc//
3116*0a6a1f1dSLionel Sambuc// previously we generated:
3117*0a6a1f1dSLionel Sambuc//   addss %xmm0, %xmm1
3118*0a6a1f1dSLionel Sambuc//   movss %xmm1, %xmm0
3119*0a6a1f1dSLionel Sambuc//
3120*0a6a1f1dSLionel Sambuc// we now generate:
3121*0a6a1f1dSLionel Sambuc//   addss %xmm1, %xmm0
3122*0a6a1f1dSLionel Sambuc
3123*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in {
3124*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
3125*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3126*0a6a1f1dSLionel Sambuc                      FR32:$src))))),
3127*0a6a1f1dSLionel Sambuc            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3128*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
3129*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3130*0a6a1f1dSLionel Sambuc                      FR32:$src))))),
3131*0a6a1f1dSLionel Sambuc            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3132*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
3133*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3134*0a6a1f1dSLionel Sambuc                      FR32:$src))))),
3135*0a6a1f1dSLionel Sambuc            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3136*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
3137*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3138*0a6a1f1dSLionel Sambuc                      FR32:$src))))),
3139*0a6a1f1dSLionel Sambuc            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3140*0a6a1f1dSLionel Sambuc}
3141*0a6a1f1dSLionel Sambuc
3142*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in {
3143*0a6a1f1dSLionel Sambuc  // SSE2 patterns to select scalar double-precision fp arithmetic instructions
3144*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
3145*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3146*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3147*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3148*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
3149*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3150*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3151*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3152*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
3153*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3154*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3155*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3156*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
3157*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3158*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3159*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3160*0a6a1f1dSLionel Sambuc}
3161*0a6a1f1dSLionel Sambuc
3162*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in {
3163*0a6a1f1dSLionel Sambuc  // If the subtarget has SSE4.1 but not AVX, the vector insert instruction is
3164*0a6a1f1dSLionel Sambuc  // lowered into a X86insertps or a X86Blendi rather than a X86Movss. When
3165*0a6a1f1dSLionel Sambuc  // selecting SSE scalar single-precision fp arithmetic instructions, make
3166*0a6a1f1dSLionel Sambuc  // sure that we correctly match them.
3167*0a6a1f1dSLionel Sambuc
3168*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3169*0a6a1f1dSLionel Sambuc                  (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3170*0a6a1f1dSLionel Sambuc                    FR32:$src))), (iPTR 0))),
3171*0a6a1f1dSLionel Sambuc            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3172*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3173*0a6a1f1dSLionel Sambuc                  (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3174*0a6a1f1dSLionel Sambuc                    FR32:$src))), (iPTR 0))),
3175*0a6a1f1dSLionel Sambuc            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3176*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3177*0a6a1f1dSLionel Sambuc                  (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3178*0a6a1f1dSLionel Sambuc                    FR32:$src))), (iPTR 0))),
3179*0a6a1f1dSLionel Sambuc            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3180*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3181*0a6a1f1dSLionel Sambuc                  (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3182*0a6a1f1dSLionel Sambuc                    FR32:$src))), (iPTR 0))),
3183*0a6a1f1dSLionel Sambuc            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3184*0a6a1f1dSLionel Sambuc
3185*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
3186*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3187*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3188*0a6a1f1dSLionel Sambuc            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3189*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
3190*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3191*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3192*0a6a1f1dSLionel Sambuc            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3193*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
3194*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3195*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3196*0a6a1f1dSLionel Sambuc            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3197*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
3198*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3199*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3200*0a6a1f1dSLionel Sambuc            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3201*0a6a1f1dSLionel Sambuc
3202*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
3203*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3204*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3205*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3206*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
3207*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3208*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3209*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3210*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
3211*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3212*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3213*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3214*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
3215*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3216*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3217*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3218*0a6a1f1dSLionel Sambuc
3219*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
3220*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3221*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3222*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3223*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
3224*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3225*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3226*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3227*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
3228*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3229*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3230*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3231*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
3232*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3233*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3234*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3235*0a6a1f1dSLionel Sambuc}
3236*0a6a1f1dSLionel Sambuc
3237*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in {
3238*0a6a1f1dSLionel Sambuc  // The following patterns select AVX Scalar single/double precision fp
3239*0a6a1f1dSLionel Sambuc  // arithmetic instructions.
3240*0a6a1f1dSLionel Sambuc
3241*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
3242*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3243*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3244*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3245*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
3246*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3247*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3248*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3249*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
3250*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3251*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3252*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3253*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
3254*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3255*0a6a1f1dSLionel Sambuc                      FR64:$src))))),
3256*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3257*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3258*0a6a1f1dSLionel Sambuc                 (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3259*0a6a1f1dSLionel Sambuc                       FR32:$src))), (iPTR 0))),
3260*0a6a1f1dSLionel Sambuc            (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3261*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3262*0a6a1f1dSLionel Sambuc                 (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3263*0a6a1f1dSLionel Sambuc                       FR32:$src))), (iPTR 0))),
3264*0a6a1f1dSLionel Sambuc            (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3265*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3266*0a6a1f1dSLionel Sambuc                 (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3267*0a6a1f1dSLionel Sambuc                       FR32:$src))), (iPTR 0))),
3268*0a6a1f1dSLionel Sambuc            (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3269*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
3270*0a6a1f1dSLionel Sambuc                 (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3271*0a6a1f1dSLionel Sambuc                       FR32:$src))), (iPTR 0))),
3272*0a6a1f1dSLionel Sambuc            (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3273*0a6a1f1dSLionel Sambuc
3274*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
3275*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3276*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3277*0a6a1f1dSLionel Sambuc            (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3278*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
3279*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3280*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3281*0a6a1f1dSLionel Sambuc            (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3282*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
3283*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3284*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3285*0a6a1f1dSLionel Sambuc            (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3286*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
3287*0a6a1f1dSLionel Sambuc                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
3288*0a6a1f1dSLionel Sambuc                      FR32:$src))), (i8 1))),
3289*0a6a1f1dSLionel Sambuc            (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
3290*0a6a1f1dSLionel Sambuc
3291*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
3292*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3293*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3294*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3295*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
3296*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3297*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3298*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3299*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
3300*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3301*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3302*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3303*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
3304*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3305*0a6a1f1dSLionel Sambuc                      FR64:$src))), (i8 1))),
3306*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3307*0a6a1f1dSLionel Sambuc
3308*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
3309*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3310*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3311*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3312*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
3313*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3314*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3315*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3316*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
3317*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3318*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3319*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3320*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
3321*0a6a1f1dSLionel Sambuc                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
3322*0a6a1f1dSLionel Sambuc                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
3323*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
3324*0a6a1f1dSLionel Sambuc}
3325*0a6a1f1dSLionel Sambuc
3326*0a6a1f1dSLionel Sambuc// Patterns used to select SSE scalar fp arithmetic instructions from
3327*0a6a1f1dSLionel Sambuc// a vector packed single/double fp operation followed by a vector insert.
3328*0a6a1f1dSLionel Sambuc//
3329*0a6a1f1dSLionel Sambuc// The effect is that the backend converts the packed fp instruction
3330*0a6a1f1dSLionel Sambuc// followed by a vector insert into a single SSE scalar fp instruction.
3331*0a6a1f1dSLionel Sambuc//
3332*0a6a1f1dSLionel Sambuc// For example, given the following code:
3333*0a6a1f1dSLionel Sambuc//   __m128 foo(__m128 A, __m128 B) {
3334*0a6a1f1dSLionel Sambuc//     __m128 C = A + B;
3335*0a6a1f1dSLionel Sambuc//     return (__m128) {c[0], a[1], a[2], a[3]};
3336*0a6a1f1dSLionel Sambuc//   }
3337*0a6a1f1dSLionel Sambuc//
3338*0a6a1f1dSLionel Sambuc// previously we generated:
3339*0a6a1f1dSLionel Sambuc//   addps %xmm0, %xmm1
3340*0a6a1f1dSLionel Sambuc//   movss %xmm1, %xmm0
3341*0a6a1f1dSLionel Sambuc//
3342*0a6a1f1dSLionel Sambuc// we now generate:
3343*0a6a1f1dSLionel Sambuc//   addss %xmm1, %xmm0
3344*0a6a1f1dSLionel Sambuc
3345*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in {
3346*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3347*0a6a1f1dSLionel Sambuc                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3348*0a6a1f1dSLionel Sambuc            (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3349*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3350*0a6a1f1dSLionel Sambuc                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3351*0a6a1f1dSLionel Sambuc            (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
3352*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3353*0a6a1f1dSLionel Sambuc                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3354*0a6a1f1dSLionel Sambuc            (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
3355*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3356*0a6a1f1dSLionel Sambuc                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3357*0a6a1f1dSLionel Sambuc            (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
3358*0a6a1f1dSLionel Sambuc}
3359*0a6a1f1dSLionel Sambuc
3360*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in {
3361*0a6a1f1dSLionel Sambuc  // SSE2 patterns to select scalar double-precision fp arithmetic instructions
3362*0a6a1f1dSLionel Sambuc  // from a packed double-precision fp instruction plus movsd.
3363*0a6a1f1dSLionel Sambuc
3364*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3365*0a6a1f1dSLionel Sambuc                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3366*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3367*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3368*0a6a1f1dSLionel Sambuc                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3369*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3370*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3371*0a6a1f1dSLionel Sambuc                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3372*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
3373*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3374*0a6a1f1dSLionel Sambuc                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3375*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3376*0a6a1f1dSLionel Sambuc}
3377*0a6a1f1dSLionel Sambuc
3378*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in {
3379*0a6a1f1dSLionel Sambuc  // With SSE4.1 we may see these operations using X86Blendi rather than
3380*0a6a1f1dSLionel Sambuc  // X86Movs{s,d}.
3381*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3382*0a6a1f1dSLionel Sambuc                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3383*0a6a1f1dSLionel Sambuc            (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3384*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3385*0a6a1f1dSLionel Sambuc                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3386*0a6a1f1dSLionel Sambuc            (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
3387*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3388*0a6a1f1dSLionel Sambuc                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3389*0a6a1f1dSLionel Sambuc            (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
3390*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3391*0a6a1f1dSLionel Sambuc                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3392*0a6a1f1dSLionel Sambuc            (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
3393*0a6a1f1dSLionel Sambuc
3394*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3395*0a6a1f1dSLionel Sambuc                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3396*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3397*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3398*0a6a1f1dSLionel Sambuc                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3399*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3400*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3401*0a6a1f1dSLionel Sambuc                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3402*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
3403*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3404*0a6a1f1dSLionel Sambuc                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3405*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3406*0a6a1f1dSLionel Sambuc
3407*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3408*0a6a1f1dSLionel Sambuc                              (v2f64 VR128:$dst), (i8 2))),
3409*0a6a1f1dSLionel Sambuc            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3410*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3411*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3412*0a6a1f1dSLionel Sambuc            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3413*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3414*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3415*0a6a1f1dSLionel Sambuc            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
3416*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3417*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3418*0a6a1f1dSLionel Sambuc            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3419*0a6a1f1dSLionel Sambuc}
3420*0a6a1f1dSLionel Sambuc
3421*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in {
3422*0a6a1f1dSLionel Sambuc  // The following patterns select AVX Scalar single/double precision fp
3423*0a6a1f1dSLionel Sambuc  // arithmetic instructions from a packed single precision fp instruction
3424*0a6a1f1dSLionel Sambuc  // plus movss/movsd.
3425*0a6a1f1dSLionel Sambuc
3426*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3427*0a6a1f1dSLionel Sambuc                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3428*0a6a1f1dSLionel Sambuc            (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3429*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3430*0a6a1f1dSLionel Sambuc                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3431*0a6a1f1dSLionel Sambuc            (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
3432*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3433*0a6a1f1dSLionel Sambuc                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3434*0a6a1f1dSLionel Sambuc            (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
3435*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3436*0a6a1f1dSLionel Sambuc                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
3437*0a6a1f1dSLionel Sambuc            (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
3438*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3439*0a6a1f1dSLionel Sambuc                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3440*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3441*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3442*0a6a1f1dSLionel Sambuc                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3443*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3444*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3445*0a6a1f1dSLionel Sambuc                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3446*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
3447*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
3448*0a6a1f1dSLionel Sambuc                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
3449*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3450*0a6a1f1dSLionel Sambuc
3451*0a6a1f1dSLionel Sambuc  // Also handle X86Blendi-based patterns.
3452*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3453*0a6a1f1dSLionel Sambuc                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3454*0a6a1f1dSLionel Sambuc            (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3455*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3456*0a6a1f1dSLionel Sambuc                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3457*0a6a1f1dSLionel Sambuc            (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
3458*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3459*0a6a1f1dSLionel Sambuc                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3460*0a6a1f1dSLionel Sambuc            (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
3461*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3462*0a6a1f1dSLionel Sambuc                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
3463*0a6a1f1dSLionel Sambuc            (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
3464*0a6a1f1dSLionel Sambuc
3465*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3466*0a6a1f1dSLionel Sambuc                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3467*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3468*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3469*0a6a1f1dSLionel Sambuc                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3470*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3471*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3472*0a6a1f1dSLionel Sambuc                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3473*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
3474*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
3475*0a6a1f1dSLionel Sambuc                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
3476*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3477*0a6a1f1dSLionel Sambuc
3478*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3479*0a6a1f1dSLionel Sambuc                              (v2f64 VR128:$dst), (i8 2))),
3480*0a6a1f1dSLionel Sambuc            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
3481*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3482*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3483*0a6a1f1dSLionel Sambuc            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
3484*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3485*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3486*0a6a1f1dSLionel Sambuc            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
3487*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
3488*0a6a1f1dSLionel Sambuc                   (v2f64 VR128:$dst), (i8 2))),
3489*0a6a1f1dSLionel Sambuc            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
3490*0a6a1f1dSLionel Sambuc}
3491*0a6a1f1dSLionel Sambuc
3492f4a2713aSLionel Sambuc/// Unop Arithmetic
3493f4a2713aSLionel Sambuc/// In addition, we also have a special variant of the scalar form here to
3494f4a2713aSLionel Sambuc/// represent the associated intrinsic operation.  This form is unlike the
3495f4a2713aSLionel Sambuc/// plain scalar form, in that it takes an entire vector (instead of a
3496f4a2713aSLionel Sambuc/// scalar) and leaves the top elements undefined.
3497f4a2713aSLionel Sambuc///
3498f4a2713aSLionel Sambuc/// And, we have a special variant form for a full-vector intrinsic form.
3499f4a2713aSLionel Sambuc
3500f4a2713aSLionel Sambuclet Sched = WriteFSqrt in {
3501f4a2713aSLionel Sambucdef SSE_SQRTPS : OpndItins<
3502f4a2713aSLionel Sambuc  IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM
3503f4a2713aSLionel Sambuc>;
3504f4a2713aSLionel Sambuc
3505f4a2713aSLionel Sambucdef SSE_SQRTSS : OpndItins<
3506f4a2713aSLionel Sambuc  IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM
3507f4a2713aSLionel Sambuc>;
3508f4a2713aSLionel Sambuc
3509f4a2713aSLionel Sambucdef SSE_SQRTPD : OpndItins<
3510f4a2713aSLionel Sambuc  IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM
3511f4a2713aSLionel Sambuc>;
3512f4a2713aSLionel Sambuc
3513f4a2713aSLionel Sambucdef SSE_SQRTSD : OpndItins<
3514f4a2713aSLionel Sambuc  IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM
3515f4a2713aSLionel Sambuc>;
3516f4a2713aSLionel Sambuc}
3517f4a2713aSLionel Sambuc
3518*0a6a1f1dSLionel Sambuclet Sched = WriteFRsqrt in {
3519*0a6a1f1dSLionel Sambucdef SSE_RSQRTPS : OpndItins<
3520*0a6a1f1dSLionel Sambuc  IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM
3521*0a6a1f1dSLionel Sambuc>;
3522*0a6a1f1dSLionel Sambuc
3523*0a6a1f1dSLionel Sambucdef SSE_RSQRTSS : OpndItins<
3524*0a6a1f1dSLionel Sambuc  IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM
3525*0a6a1f1dSLionel Sambuc>;
3526*0a6a1f1dSLionel Sambuc}
3527*0a6a1f1dSLionel Sambuc
3528f4a2713aSLionel Sambuclet Sched = WriteFRcp in {
3529f4a2713aSLionel Sambucdef SSE_RCPP : OpndItins<
3530f4a2713aSLionel Sambuc  IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
3531f4a2713aSLionel Sambuc>;
3532f4a2713aSLionel Sambuc
3533f4a2713aSLionel Sambucdef SSE_RCPS : OpndItins<
3534f4a2713aSLionel Sambuc  IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
3535f4a2713aSLionel Sambuc>;
3536f4a2713aSLionel Sambuc}
3537f4a2713aSLionel Sambuc
3538*0a6a1f1dSLionel Sambuc/// sse1_fp_unop_s - SSE1 unops in scalar form
3539*0a6a1f1dSLionel Sambuc/// For the non-AVX defs, we need $src1 to be tied to $dst because
3540*0a6a1f1dSLionel Sambuc/// the HW instructions are 2 operand / destructive.
3541*0a6a1f1dSLionel Sambucmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3542f4a2713aSLionel Sambuc                           OpndItins itins> {
3543f4a2713aSLionel Sambuclet Predicates = [HasAVX], hasSideEffects = 0 in {
3544f4a2713aSLionel Sambuc  def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
3545f4a2713aSLionel Sambuc                       (ins FR32:$src1, FR32:$src2),
3546f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
3547f4a2713aSLionel Sambuc                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3548f4a2713aSLionel Sambuc                []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3549f4a2713aSLionel Sambuc  let mayLoad = 1 in {
3550f4a2713aSLionel Sambuc  def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
3551f4a2713aSLionel Sambuc                      (ins FR32:$src1,f32mem:$src2),
3552f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
3553f4a2713aSLionel Sambuc                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3554f4a2713aSLionel Sambuc                      []>, VEX_4V, VEX_LIG,
3555f4a2713aSLionel Sambuc                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
3556*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in
3557f4a2713aSLionel Sambuc  def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3558f4a2713aSLionel Sambuc                      (ins VR128:$src1, ssmem:$src2),
3559f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
3560f4a2713aSLionel Sambuc                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3561f4a2713aSLionel Sambuc                      []>, VEX_4V, VEX_LIG,
3562f4a2713aSLionel Sambuc                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
3563f4a2713aSLionel Sambuc  }
3564f4a2713aSLionel Sambuc}
3565f4a2713aSLionel Sambuc
3566f4a2713aSLionel Sambuc  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
3567f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3568f4a2713aSLionel Sambuc                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
3569f4a2713aSLionel Sambuc  // For scalar unary operations, fold a load into the operation
3570f4a2713aSLionel Sambuc  // only in OptForSize mode. It eliminates an instruction, but it also
3571f4a2713aSLionel Sambuc  // eliminates a whole-register clobber (the load), so it introduces a
3572f4a2713aSLionel Sambuc  // partial register update condition.
3573f4a2713aSLionel Sambuc  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
3574f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3575f4a2713aSLionel Sambuc                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
3576f4a2713aSLionel Sambuc            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
3577*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3578f4a2713aSLionel Sambuc    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
3579f4a2713aSLionel Sambuc                      (ins VR128:$src1, VR128:$src2),
3580f4a2713aSLionel Sambuc                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3581f4a2713aSLionel Sambuc                      [], itins.rr>, Sched<[itins.Sched]>;
3582f4a2713aSLionel Sambuc    let mayLoad = 1, hasSideEffects = 0 in
3583f4a2713aSLionel Sambuc    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3584f4a2713aSLionel Sambuc                      (ins VR128:$src1, ssmem:$src2),
3585f4a2713aSLionel Sambuc                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3586f4a2713aSLionel Sambuc                      [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3587f4a2713aSLionel Sambuc  }
3588f4a2713aSLionel Sambuc}
3589f4a2713aSLionel Sambuc
3590f4a2713aSLionel Sambuc/// sse1_fp_unop_p - SSE1 unops in packed form.
3591f4a2713aSLionel Sambucmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3592f4a2713aSLionel Sambuc                          OpndItins itins> {
3593f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
3594f4a2713aSLionel Sambuc  def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3595f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
3596f4a2713aSLionel Sambuc                                  "ps\t{$src, $dst|$dst, $src}"),
3597f4a2713aSLionel Sambuc                       [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
3598f4a2713aSLionel Sambuc                       itins.rr>, VEX, Sched<[itins.Sched]>;
3599f4a2713aSLionel Sambuc  def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3600f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
3601f4a2713aSLionel Sambuc                                  "ps\t{$src, $dst|$dst, $src}"),
3602f4a2713aSLionel Sambuc                       [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))],
3603f4a2713aSLionel Sambuc                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
3604f4a2713aSLionel Sambuc  def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3605f4a2713aSLionel Sambuc                        !strconcat("v", OpcodeStr,
3606f4a2713aSLionel Sambuc                                   "ps\t{$src, $dst|$dst, $src}"),
3607f4a2713aSLionel Sambuc                        [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
3608f4a2713aSLionel Sambuc                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
3609f4a2713aSLionel Sambuc  def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3610f4a2713aSLionel Sambuc                        !strconcat("v", OpcodeStr,
3611f4a2713aSLionel Sambuc                                   "ps\t{$src, $dst|$dst, $src}"),
3612f4a2713aSLionel Sambuc                        [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))],
3613f4a2713aSLionel Sambuc                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
3614f4a2713aSLionel Sambuc}
3615f4a2713aSLionel Sambuc
3616f4a2713aSLionel Sambuc  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3617f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3618f4a2713aSLionel Sambuc                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
3619f4a2713aSLionel Sambuc            Sched<[itins.Sched]>;
3620f4a2713aSLionel Sambuc  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3621f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3622f4a2713aSLionel Sambuc                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
3623f4a2713aSLionel Sambuc            Sched<[itins.Sched.Folded]>;
3624f4a2713aSLionel Sambuc}
3625f4a2713aSLionel Sambuc
3626f4a2713aSLionel Sambuc/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
3627f4a2713aSLionel Sambucmulticlass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
3628f4a2713aSLionel Sambuc                              Intrinsic V4F32Int, Intrinsic V8F32Int,
3629f4a2713aSLionel Sambuc                              OpndItins itins> {
3630*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
3631f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
3632f4a2713aSLionel Sambuc  def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3633f4a2713aSLionel Sambuc                           !strconcat("v", OpcodeStr,
3634f4a2713aSLionel Sambuc                                      "ps\t{$src, $dst|$dst, $src}"),
3635f4a2713aSLionel Sambuc                           [(set VR128:$dst, (V4F32Int VR128:$src))],
3636f4a2713aSLionel Sambuc                           itins.rr>, VEX, Sched<[itins.Sched]>;
3637f4a2713aSLionel Sambuc  def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3638f4a2713aSLionel Sambuc                          !strconcat("v", OpcodeStr,
3639f4a2713aSLionel Sambuc                          "ps\t{$src, $dst|$dst, $src}"),
3640f4a2713aSLionel Sambuc                          [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))],
3641f4a2713aSLionel Sambuc                          itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
3642f4a2713aSLionel Sambuc  def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3643f4a2713aSLionel Sambuc                            !strconcat("v", OpcodeStr,
3644f4a2713aSLionel Sambuc                                       "ps\t{$src, $dst|$dst, $src}"),
3645f4a2713aSLionel Sambuc                            [(set VR256:$dst, (V8F32Int VR256:$src))],
3646f4a2713aSLionel Sambuc                            itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
3647f4a2713aSLionel Sambuc  def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
3648f4a2713aSLionel Sambuc                          (ins f256mem:$src),
3649f4a2713aSLionel Sambuc                          !strconcat("v", OpcodeStr,
3650f4a2713aSLionel Sambuc                                    "ps\t{$src, $dst|$dst, $src}"),
3651f4a2713aSLionel Sambuc                          [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))],
3652f4a2713aSLionel Sambuc                          itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
3653f4a2713aSLionel Sambuc}
3654f4a2713aSLionel Sambuc
3655f4a2713aSLionel Sambuc  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3656f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3657f4a2713aSLionel Sambuc                    [(set VR128:$dst, (V4F32Int VR128:$src))],
3658f4a2713aSLionel Sambuc                    itins.rr>, Sched<[itins.Sched]>;
3659f4a2713aSLionel Sambuc  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3660f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3661f4a2713aSLionel Sambuc                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
3662f4a2713aSLionel Sambuc                    itins.rm>, Sched<[itins.Sched.Folded]>;
3663*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1
3664f4a2713aSLionel Sambuc}
3665f4a2713aSLionel Sambuc
3666f4a2713aSLionel Sambuc/// sse2_fp_unop_s - SSE2 unops in scalar form.
3667f4a2713aSLionel Sambucmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
3668f4a2713aSLionel Sambuc                          SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
3669f4a2713aSLionel Sambuclet Predicates = [HasAVX], hasSideEffects = 0 in {
3670f4a2713aSLionel Sambuc  def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
3671f4a2713aSLionel Sambuc                      (ins FR64:$src1, FR64:$src2),
3672f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
3673f4a2713aSLionel Sambuc                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3674f4a2713aSLionel Sambuc                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3675f4a2713aSLionel Sambuc  let mayLoad = 1 in {
3676f4a2713aSLionel Sambuc  def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
3677f4a2713aSLionel Sambuc                      (ins FR64:$src1,f64mem:$src2),
3678f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
3679f4a2713aSLionel Sambuc                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3680f4a2713aSLionel Sambuc                      []>, VEX_4V, VEX_LIG,
3681f4a2713aSLionel Sambuc                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
3682*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in
3683f4a2713aSLionel Sambuc  def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
3684f4a2713aSLionel Sambuc                      (ins VR128:$src1, sdmem:$src2),
3685f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
3686f4a2713aSLionel Sambuc                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3687f4a2713aSLionel Sambuc                      []>, VEX_4V, VEX_LIG,
3688f4a2713aSLionel Sambuc                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
3689f4a2713aSLionel Sambuc  }
3690f4a2713aSLionel Sambuc}
3691f4a2713aSLionel Sambuc
3692f4a2713aSLionel Sambuc  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
3693f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3694f4a2713aSLionel Sambuc                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
3695f4a2713aSLionel Sambuc            Sched<[itins.Sched]>;
3696f4a2713aSLionel Sambuc  // See the comments in sse1_fp_unop_s for why this is OptForSize.
3697f4a2713aSLionel Sambuc  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
3698f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3699f4a2713aSLionel Sambuc                [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
3700f4a2713aSLionel Sambuc            Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
3701*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in {
3702f4a2713aSLionel Sambuc  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3703f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3704f4a2713aSLionel Sambuc                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
3705f4a2713aSLionel Sambuc                Sched<[itins.Sched]>;
3706f4a2713aSLionel Sambuc  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
3707f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3708f4a2713aSLionel Sambuc                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
3709f4a2713aSLionel Sambuc                Sched<[itins.Sched.Folded]>;
3710f4a2713aSLionel Sambuc}
3711*0a6a1f1dSLionel Sambuc}
3712f4a2713aSLionel Sambuc
3713f4a2713aSLionel Sambuc/// sse2_fp_unop_p - SSE2 unops in vector forms.
3714f4a2713aSLionel Sambucmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
3715f4a2713aSLionel Sambuc                          SDNode OpNode, OpndItins itins> {
3716f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
3717f4a2713aSLionel Sambuc  def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3718f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
3719f4a2713aSLionel Sambuc                                  "pd\t{$src, $dst|$dst, $src}"),
3720f4a2713aSLionel Sambuc                       [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
3721f4a2713aSLionel Sambuc                       itins.rr>, VEX, Sched<[itins.Sched]>;
3722f4a2713aSLionel Sambuc  def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3723f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
3724f4a2713aSLionel Sambuc                                  "pd\t{$src, $dst|$dst, $src}"),
3725f4a2713aSLionel Sambuc                       [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))],
3726f4a2713aSLionel Sambuc                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
3727f4a2713aSLionel Sambuc  def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3728f4a2713aSLionel Sambuc                        !strconcat("v", OpcodeStr,
3729f4a2713aSLionel Sambuc                                   "pd\t{$src, $dst|$dst, $src}"),
3730f4a2713aSLionel Sambuc                        [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
3731f4a2713aSLionel Sambuc                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
3732f4a2713aSLionel Sambuc  def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3733f4a2713aSLionel Sambuc                        !strconcat("v", OpcodeStr,
3734f4a2713aSLionel Sambuc                                   "pd\t{$src, $dst|$dst, $src}"),
3735f4a2713aSLionel Sambuc                        [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))],
3736f4a2713aSLionel Sambuc                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
3737f4a2713aSLionel Sambuc}
3738f4a2713aSLionel Sambuc
3739f4a2713aSLionel Sambuc  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3740f4a2713aSLionel Sambuc              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3741f4a2713aSLionel Sambuc              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
3742f4a2713aSLionel Sambuc            Sched<[itins.Sched]>;
3743f4a2713aSLionel Sambuc  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3744f4a2713aSLionel Sambuc                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3745f4a2713aSLionel Sambuc                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
3746f4a2713aSLionel Sambuc            Sched<[itins.Sched.Folded]>;
3747f4a2713aSLionel Sambuc}
3748f4a2713aSLionel Sambuc
3749f4a2713aSLionel Sambuc// Square root.
3750*0a6a1f1dSLionel Sambucdefm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
3751f4a2713aSLionel Sambuc             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
3752f4a2713aSLionel Sambuc             sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
3753f4a2713aSLionel Sambuc                            SSE_SQRTSD>,
3754f4a2713aSLionel Sambuc             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
3755f4a2713aSLionel Sambuc
3756f4a2713aSLionel Sambuc// Reciprocal approximations. Note that these typically require refinement
3757f4a2713aSLionel Sambuc// in order to obtain suitable precision.
3758*0a6a1f1dSLionel Sambucdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
3759*0a6a1f1dSLionel Sambuc             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>,
3760f4a2713aSLionel Sambuc             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
3761*0a6a1f1dSLionel Sambuc                                int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>;
3762*0a6a1f1dSLionel Sambucdefm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
3763f4a2713aSLionel Sambuc             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
3764f4a2713aSLionel Sambuc             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
3765f4a2713aSLionel Sambuc                                int_x86_avx_rcp_ps_256, SSE_RCPP>;
3766f4a2713aSLionel Sambuc
3767f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
3768f4a2713aSLionel Sambuc  def : Pat<(f32 (fsqrt FR32:$src)),
3769f4a2713aSLionel Sambuc            (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3770f4a2713aSLionel Sambuc  def : Pat<(f32 (fsqrt (load addr:$src))),
3771f4a2713aSLionel Sambuc            (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3772f4a2713aSLionel Sambuc            Requires<[HasAVX, OptForSize]>;
3773f4a2713aSLionel Sambuc  def : Pat<(f64 (fsqrt FR64:$src)),
3774f4a2713aSLionel Sambuc            (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
3775f4a2713aSLionel Sambuc  def : Pat<(f64 (fsqrt (load addr:$src))),
3776f4a2713aSLionel Sambuc            (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
3777f4a2713aSLionel Sambuc            Requires<[HasAVX, OptForSize]>;
3778f4a2713aSLionel Sambuc
3779f4a2713aSLionel Sambuc  def : Pat<(f32 (X86frsqrt FR32:$src)),
3780f4a2713aSLionel Sambuc            (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3781f4a2713aSLionel Sambuc  def : Pat<(f32 (X86frsqrt (load addr:$src))),
3782f4a2713aSLionel Sambuc            (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3783f4a2713aSLionel Sambuc            Requires<[HasAVX, OptForSize]>;
3784f4a2713aSLionel Sambuc
3785f4a2713aSLionel Sambuc  def : Pat<(f32 (X86frcp FR32:$src)),
3786f4a2713aSLionel Sambuc            (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3787f4a2713aSLionel Sambuc  def : Pat<(f32 (X86frcp (load addr:$src))),
3788f4a2713aSLionel Sambuc            (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3789f4a2713aSLionel Sambuc            Requires<[HasAVX, OptForSize]>;
3790f4a2713aSLionel Sambuc}
3791f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
3792f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3793f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
3794f4a2713aSLionel Sambuc                                        (COPY_TO_REGCLASS VR128:$src, FR32)),
3795f4a2713aSLionel Sambuc                              VR128)>;
3796f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
3797f4a2713aSLionel Sambuc            (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3798f4a2713aSLionel Sambuc
3799f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3800f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
3801f4a2713aSLionel Sambuc                                        (COPY_TO_REGCLASS VR128:$src, FR64)),
3802f4a2713aSLionel Sambuc                              VR128)>;
3803f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
3804f4a2713aSLionel Sambuc            (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
3805f4a2713aSLionel Sambuc}
3806f4a2713aSLionel Sambuc
3807f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
3808f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3809f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
3810f4a2713aSLionel Sambuc                                         (COPY_TO_REGCLASS VR128:$src, FR32)),
3811f4a2713aSLionel Sambuc                              VR128)>;
3812f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
3813f4a2713aSLionel Sambuc            (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3814f4a2713aSLionel Sambuc
3815f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3816f4a2713aSLionel Sambuc            (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
3817f4a2713aSLionel Sambuc                                       (COPY_TO_REGCLASS VR128:$src, FR32)),
3818f4a2713aSLionel Sambuc                              VR128)>;
3819f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
3820f4a2713aSLionel Sambuc            (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3821f4a2713aSLionel Sambuc}
3822f4a2713aSLionel Sambuc
3823*0a6a1f1dSLionel Sambuc// These are unary operations, but they are modeled as having 2 source operands
3824*0a6a1f1dSLionel Sambuc// because the high elements of the destination are unchanged in SSE.
3825f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in {
3826f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3827f4a2713aSLionel Sambuc            (RSQRTSSr_Int VR128:$src, VR128:$src)>;
3828f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3829f4a2713aSLionel Sambuc            (RCPSSr_Int VR128:$src, VR128:$src)>;
3830*0a6a1f1dSLionel Sambuc  def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3831*0a6a1f1dSLionel Sambuc            (SQRTSSr_Int VR128:$src, VR128:$src)>;
3832f4a2713aSLionel Sambuc}
3833f4a2713aSLionel Sambuc
3834f4a2713aSLionel Sambuc// There is no f64 version of the reciprocal approximation instructions.
3835f4a2713aSLionel Sambuc
3836f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3837f4a2713aSLionel Sambuc// SSE 1 & 2 - Non-temporal stores
3838f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3839f4a2713aSLionel Sambuc
3840f4a2713aSLionel Sambuclet AddedComplexity = 400 in { // Prefer non-temporal versions
3841f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in {
3842*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
3843f4a2713aSLionel Sambucdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
3844f4a2713aSLionel Sambuc                     (ins f128mem:$dst, VR128:$src),
3845f4a2713aSLionel Sambuc                     "movntps\t{$src, $dst|$dst, $src}",
3846f4a2713aSLionel Sambuc                     [(alignednontemporalstore (v4f32 VR128:$src),
3847f4a2713aSLionel Sambuc                                               addr:$dst)],
3848f4a2713aSLionel Sambuc                                               IIC_SSE_MOVNT>, VEX;
3849f4a2713aSLionel Sambucdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
3850f4a2713aSLionel Sambuc                     (ins f128mem:$dst, VR128:$src),
3851f4a2713aSLionel Sambuc                     "movntpd\t{$src, $dst|$dst, $src}",
3852f4a2713aSLionel Sambuc                     [(alignednontemporalstore (v2f64 VR128:$src),
3853f4a2713aSLionel Sambuc                                               addr:$dst)],
3854f4a2713aSLionel Sambuc                                               IIC_SSE_MOVNT>, VEX;
3855f4a2713aSLionel Sambuc
3856f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in
3857f4a2713aSLionel Sambucdef VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
3858f4a2713aSLionel Sambuc                         (ins f128mem:$dst, VR128:$src),
3859f4a2713aSLionel Sambuc                         "movntdq\t{$src, $dst|$dst, $src}",
3860f4a2713aSLionel Sambuc                         [(alignednontemporalstore (v2i64 VR128:$src),
3861f4a2713aSLionel Sambuc                                                   addr:$dst)],
3862f4a2713aSLionel Sambuc                                                   IIC_SSE_MOVNT>, VEX;
3863f4a2713aSLionel Sambuc
3864f4a2713aSLionel Sambucdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
3865f4a2713aSLionel Sambuc                     (ins f256mem:$dst, VR256:$src),
3866f4a2713aSLionel Sambuc                     "movntps\t{$src, $dst|$dst, $src}",
3867f4a2713aSLionel Sambuc                     [(alignednontemporalstore (v8f32 VR256:$src),
3868f4a2713aSLionel Sambuc                                               addr:$dst)],
3869f4a2713aSLionel Sambuc                                               IIC_SSE_MOVNT>, VEX, VEX_L;
3870f4a2713aSLionel Sambucdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
3871f4a2713aSLionel Sambuc                     (ins f256mem:$dst, VR256:$src),
3872f4a2713aSLionel Sambuc                     "movntpd\t{$src, $dst|$dst, $src}",
3873f4a2713aSLionel Sambuc                     [(alignednontemporalstore (v4f64 VR256:$src),
3874f4a2713aSLionel Sambuc                                               addr:$dst)],
3875f4a2713aSLionel Sambuc                                               IIC_SSE_MOVNT>, VEX, VEX_L;
3876f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in
3877f4a2713aSLionel Sambucdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
3878f4a2713aSLionel Sambuc                    (ins f256mem:$dst, VR256:$src),
3879f4a2713aSLionel Sambuc                    "movntdq\t{$src, $dst|$dst, $src}",
3880f4a2713aSLionel Sambuc                    [(alignednontemporalstore (v4i64 VR256:$src),
3881f4a2713aSLionel Sambuc                                              addr:$dst)],
3882f4a2713aSLionel Sambuc                                              IIC_SSE_MOVNT>, VEX, VEX_L;
3883*0a6a1f1dSLionel Sambuc}
3884f4a2713aSLionel Sambuc
3885f4a2713aSLionel Sambucdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3886f4a2713aSLionel Sambuc                    "movntps\t{$src, $dst|$dst, $src}",
3887f4a2713aSLionel Sambuc                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
3888f4a2713aSLionel Sambuc                    IIC_SSE_MOVNT>;
3889f4a2713aSLionel Sambucdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3890f4a2713aSLionel Sambuc                    "movntpd\t{$src, $dst|$dst, $src}",
3891f4a2713aSLionel Sambuc                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
3892f4a2713aSLionel Sambuc                    IIC_SSE_MOVNT>;
3893f4a2713aSLionel Sambuc
3894f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in
3895f4a2713aSLionel Sambucdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3896f4a2713aSLionel Sambuc                    "movntdq\t{$src, $dst|$dst, $src}",
3897f4a2713aSLionel Sambuc                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
3898f4a2713aSLionel Sambuc                    IIC_SSE_MOVNT>;
3899f4a2713aSLionel Sambuc
3900f4a2713aSLionel Sambuc// There is no AVX form for instructions below this point
3901f4a2713aSLionel Sambucdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
3902f4a2713aSLionel Sambuc                 "movnti{l}\t{$src, $dst|$dst, $src}",
3903f4a2713aSLionel Sambuc                 [(nontemporalstore (i32 GR32:$src), addr:$dst)],
3904f4a2713aSLionel Sambuc                 IIC_SSE_MOVNT>,
3905*0a6a1f1dSLionel Sambuc               PS, Requires<[HasSSE2]>;
3906f4a2713aSLionel Sambucdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
3907f4a2713aSLionel Sambuc                     "movnti{q}\t{$src, $dst|$dst, $src}",
3908f4a2713aSLionel Sambuc                     [(nontemporalstore (i64 GR64:$src), addr:$dst)],
3909f4a2713aSLionel Sambuc                     IIC_SSE_MOVNT>,
3910*0a6a1f1dSLionel Sambuc                  PS, Requires<[HasSSE2]>;
3911f4a2713aSLionel Sambuc} // SchedRW = [WriteStore]
3912f4a2713aSLionel Sambuc
3913*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
3914*0a6a1f1dSLionel Sambuc  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
3915*0a6a1f1dSLionel Sambuc            (VMOVNTPSmr addr:$dst, VR128:$src)>;
3916*0a6a1f1dSLionel Sambuc}
3917f4a2713aSLionel Sambuc
3918*0a6a1f1dSLionel Sambucdef : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
3919*0a6a1f1dSLionel Sambuc          (MOVNTPSmr addr:$dst, VR128:$src)>;
3920*0a6a1f1dSLionel Sambuc
3921f4a2713aSLionel Sambuc} // AddedComplexity
3922f4a2713aSLionel Sambuc
3923f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3924f4a2713aSLionel Sambuc// SSE 1 & 2 - Prefetch and memory fence
3925f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3926f4a2713aSLionel Sambuc
3927f4a2713aSLionel Sambuc// Prefetch intrinsic.
3928f4a2713aSLionel Sambuclet Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
3929f4a2713aSLionel Sambucdef PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
3930f4a2713aSLionel Sambuc    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
3931f4a2713aSLionel Sambuc    IIC_SSE_PREFETCH>, TB;
3932f4a2713aSLionel Sambucdef PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
3933f4a2713aSLionel Sambuc    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))],
3934f4a2713aSLionel Sambuc    IIC_SSE_PREFETCH>, TB;
3935f4a2713aSLionel Sambucdef PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
3936f4a2713aSLionel Sambuc    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))],
3937f4a2713aSLionel Sambuc    IIC_SSE_PREFETCH>, TB;
3938f4a2713aSLionel Sambucdef PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
3939f4a2713aSLionel Sambuc    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))],
3940f4a2713aSLionel Sambuc    IIC_SSE_PREFETCH>, TB;
3941f4a2713aSLionel Sambuc}
3942f4a2713aSLionel Sambuc
3943*0a6a1f1dSLionel Sambuc// FIXME: How should flush instruction be modeled?
3944f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in {
3945f4a2713aSLionel Sambuc// Flush cache
3946f4a2713aSLionel Sambucdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
3947f4a2713aSLionel Sambuc               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
3948f4a2713aSLionel Sambuc               IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
3949*0a6a1f1dSLionel Sambuc}
3950f4a2713aSLionel Sambuc
3951*0a6a1f1dSLionel Sambuclet SchedRW = [WriteNop] in {
3952f4a2713aSLionel Sambuc// Pause. This "instruction" is encoded as "rep; nop", so even though it
3953f4a2713aSLionel Sambuc// was introduced with SSE2, it's backward compatible.
3954*0a6a1f1dSLionel Sambucdef PAUSE : I<0x90, RawFrm, (outs), (ins),
3955*0a6a1f1dSLionel Sambuc              "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
3956*0a6a1f1dSLionel Sambuc              OBXS, Requires<[HasSSE2]>;
3957*0a6a1f1dSLionel Sambuc}
3958f4a2713aSLionel Sambuc
3959*0a6a1f1dSLionel Sambuclet SchedRW = [WriteFence] in {
3960f4a2713aSLionel Sambuc// Load, store, and memory fence
3961f4a2713aSLionel Sambucdef SFENCE : I<0xAE, MRM_F8, (outs), (ins),
3962f4a2713aSLionel Sambuc               "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
3963f4a2713aSLionel Sambuc               TB, Requires<[HasSSE1]>;
3964f4a2713aSLionel Sambucdef LFENCE : I<0xAE, MRM_E8, (outs), (ins),
3965f4a2713aSLionel Sambuc               "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
3966f4a2713aSLionel Sambuc               TB, Requires<[HasSSE2]>;
3967f4a2713aSLionel Sambucdef MFENCE : I<0xAE, MRM_F0, (outs), (ins),
3968f4a2713aSLionel Sambuc               "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
3969f4a2713aSLionel Sambuc               TB, Requires<[HasSSE2]>;
3970f4a2713aSLionel Sambuc} // SchedRW
3971f4a2713aSLionel Sambuc
3972f4a2713aSLionel Sambucdef : Pat<(X86SFence), (SFENCE)>;
3973f4a2713aSLionel Sambucdef : Pat<(X86LFence), (LFENCE)>;
3974f4a2713aSLionel Sambucdef : Pat<(X86MFence), (MFENCE)>;
3975f4a2713aSLionel Sambuc
3976f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3977f4a2713aSLionel Sambuc// SSE 1 & 2 - Load/Store XCSR register
3978f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
3979f4a2713aSLionel Sambuc
3980f4a2713aSLionel Sambucdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
3981f4a2713aSLionel Sambuc                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
3982f4a2713aSLionel Sambuc                  IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
3983f4a2713aSLionel Sambucdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3984f4a2713aSLionel Sambuc                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
3985f4a2713aSLionel Sambuc                  IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
3986f4a2713aSLionel Sambuc
3987*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in {
3988*0a6a1f1dSLionel Sambucdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
3989f4a2713aSLionel Sambuc                "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
3990*0a6a1f1dSLionel Sambuc                IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
3991*0a6a1f1dSLionel Sambucdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3992f4a2713aSLionel Sambuc                "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
3993*0a6a1f1dSLionel Sambuc                IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
3994*0a6a1f1dSLionel Sambuc}
3995f4a2713aSLionel Sambuc
3996f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
3997f4a2713aSLionel Sambuc// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
3998f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
3999f4a2713aSLionel Sambuc
4000f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions
4001f4a2713aSLionel Sambuc
4002*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, SchedRW = [WriteMove] in {
4003f4a2713aSLionel Sambucdef VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4004f4a2713aSLionel Sambuc                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
4005f4a2713aSLionel Sambuc                    VEX;
4006f4a2713aSLionel Sambucdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
4007f4a2713aSLionel Sambuc                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
4008f4a2713aSLionel Sambuc                    VEX, VEX_L;
4009f4a2713aSLionel Sambucdef VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4010f4a2713aSLionel Sambuc                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
4011f4a2713aSLionel Sambuc                    VEX;
4012f4a2713aSLionel Sambucdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
4013f4a2713aSLionel Sambuc                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
4014f4a2713aSLionel Sambuc                    VEX, VEX_L;
4015f4a2713aSLionel Sambuc}
4016f4a2713aSLionel Sambuc
4017f4a2713aSLionel Sambuc// For Disassembler
4018*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
4019*0a6a1f1dSLionel Sambuc    SchedRW = [WriteMove] in {
4020f4a2713aSLionel Sambucdef VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4021f4a2713aSLionel Sambuc                        "movdqa\t{$src, $dst|$dst, $src}", [],
4022f4a2713aSLionel Sambuc                        IIC_SSE_MOVA_P_RR>,
4023f4a2713aSLionel Sambuc                        VEX;
4024f4a2713aSLionel Sambucdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
4025f4a2713aSLionel Sambuc                        "movdqa\t{$src, $dst|$dst, $src}", [],
4026f4a2713aSLionel Sambuc                        IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
4027f4a2713aSLionel Sambucdef VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4028f4a2713aSLionel Sambuc                        "movdqu\t{$src, $dst|$dst, $src}", [],
4029f4a2713aSLionel Sambuc                        IIC_SSE_MOVU_P_RR>,
4030f4a2713aSLionel Sambuc                        VEX;
4031f4a2713aSLionel Sambucdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
4032f4a2713aSLionel Sambuc                        "movdqu\t{$src, $dst|$dst, $src}", [],
4033f4a2713aSLionel Sambuc                        IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
4034f4a2713aSLionel Sambuc}
4035f4a2713aSLionel Sambuc
4036f4a2713aSLionel Sambuclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
4037*0a6a1f1dSLionel Sambuc    hasSideEffects = 0, SchedRW = [WriteLoad] in {
4038f4a2713aSLionel Sambucdef VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4039f4a2713aSLionel Sambuc                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
4040f4a2713aSLionel Sambuc                   VEX;
4041f4a2713aSLionel Sambucdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
4042f4a2713aSLionel Sambuc                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
4043f4a2713aSLionel Sambuc                   VEX, VEX_L;
4044f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4045f4a2713aSLionel Sambuc  def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4046f4a2713aSLionel Sambuc                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
4047f4a2713aSLionel Sambuc                    XS, VEX;
4048f4a2713aSLionel Sambuc  def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
4049f4a2713aSLionel Sambuc                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
4050f4a2713aSLionel Sambuc                    XS, VEX, VEX_L;
4051f4a2713aSLionel Sambuc}
4052f4a2713aSLionel Sambuc}
4053f4a2713aSLionel Sambuc
4054*0a6a1f1dSLionel Sambuclet mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
4055f4a2713aSLionel Sambucdef VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
4056f4a2713aSLionel Sambuc                     (ins i128mem:$dst, VR128:$src),
4057f4a2713aSLionel Sambuc                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
4058f4a2713aSLionel Sambuc                     VEX;
4059f4a2713aSLionel Sambucdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
4060f4a2713aSLionel Sambuc                     (ins i256mem:$dst, VR256:$src),
4061f4a2713aSLionel Sambuc                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
4062f4a2713aSLionel Sambuc                     VEX, VEX_L;
4063f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4064f4a2713aSLionel Sambucdef VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
4065f4a2713aSLionel Sambuc                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
4066f4a2713aSLionel Sambuc                  XS, VEX;
4067f4a2713aSLionel Sambucdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
4068f4a2713aSLionel Sambuc                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
4069f4a2713aSLionel Sambuc                  XS, VEX, VEX_L;
4070f4a2713aSLionel Sambuc}
4071f4a2713aSLionel Sambuc}
4072f4a2713aSLionel Sambuc
4073f4a2713aSLionel Sambuclet SchedRW = [WriteMove] in {
4074*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in
4075f4a2713aSLionel Sambucdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4076f4a2713aSLionel Sambuc                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
4077f4a2713aSLionel Sambuc
4078f4a2713aSLionel Sambucdef MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4079f4a2713aSLionel Sambuc                   "movdqu\t{$src, $dst|$dst, $src}",
4080f4a2713aSLionel Sambuc                   [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
4081f4a2713aSLionel Sambuc
4082f4a2713aSLionel Sambuc// For Disassembler
4083*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
4084f4a2713aSLionel Sambucdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4085f4a2713aSLionel Sambuc                       "movdqa\t{$src, $dst|$dst, $src}", [],
4086f4a2713aSLionel Sambuc                       IIC_SSE_MOVA_P_RR>;
4087f4a2713aSLionel Sambuc
4088f4a2713aSLionel Sambucdef MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4089f4a2713aSLionel Sambuc                       "movdqu\t{$src, $dst|$dst, $src}",
4090f4a2713aSLionel Sambuc                       [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
4091f4a2713aSLionel Sambuc}
4092f4a2713aSLionel Sambuc} // SchedRW
4093f4a2713aSLionel Sambuc
4094f4a2713aSLionel Sambuclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
4095*0a6a1f1dSLionel Sambuc    hasSideEffects = 0, SchedRW = [WriteLoad] in {
4096f4a2713aSLionel Sambucdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4097f4a2713aSLionel Sambuc                   "movdqa\t{$src, $dst|$dst, $src}",
4098f4a2713aSLionel Sambuc                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
4099f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_RM>;
4100f4a2713aSLionel Sambucdef MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4101f4a2713aSLionel Sambuc                   "movdqu\t{$src, $dst|$dst, $src}",
4102f4a2713aSLionel Sambuc                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
4103f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_RM>,
4104f4a2713aSLionel Sambuc                 XS, Requires<[UseSSE2]>;
4105f4a2713aSLionel Sambuc}
4106f4a2713aSLionel Sambuc
4107*0a6a1f1dSLionel Sambuclet mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
4108f4a2713aSLionel Sambucdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
4109f4a2713aSLionel Sambuc                   "movdqa\t{$src, $dst|$dst, $src}",
4110f4a2713aSLionel Sambuc                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
4111f4a2713aSLionel Sambuc                   IIC_SSE_MOVA_P_MR>;
4112f4a2713aSLionel Sambucdef MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
4113f4a2713aSLionel Sambuc                   "movdqu\t{$src, $dst|$dst, $src}",
4114f4a2713aSLionel Sambuc                   [/*(store (v2i64 VR128:$src), addr:$dst)*/],
4115f4a2713aSLionel Sambuc                   IIC_SSE_MOVU_P_MR>,
4116f4a2713aSLionel Sambuc                 XS, Requires<[UseSSE2]>;
4117f4a2713aSLionel Sambuc}
4118f4a2713aSLionel Sambuc
4119f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4120f4a2713aSLionel Sambuc
4121f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4122f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
4123f4a2713aSLionel Sambuc            (VMOVDQUmr addr:$dst, VR128:$src)>;
4124f4a2713aSLionel Sambuc  def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
4125f4a2713aSLionel Sambuc            (VMOVDQUYmr addr:$dst, VR256:$src)>;
4126f4a2713aSLionel Sambuc}
4127f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in
4128f4a2713aSLionel Sambucdef : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
4129f4a2713aSLionel Sambuc          (MOVDQUmr addr:$dst, VR128:$src)>;
4130f4a2713aSLionel Sambuc
4131f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4132f4a2713aSLionel Sambuc// SSE2 - Packed Integer Arithmetic Instructions
4133f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4134f4a2713aSLionel Sambuc
4135f4a2713aSLionel Sambuclet Sched = WriteVecIMul in
4136f4a2713aSLionel Sambucdef SSE_PMADD : OpndItins<
4137f4a2713aSLionel Sambuc  IIC_SSE_PMADD, IIC_SSE_PMADD
4138f4a2713aSLionel Sambuc>;
4139f4a2713aSLionel Sambuc
4140f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions
4141f4a2713aSLionel Sambuc
4142f4a2713aSLionel Sambucmulticlass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
4143f4a2713aSLionel Sambuc                            RegisterClass RC, PatFrag memop_frag,
4144f4a2713aSLionel Sambuc                            X86MemOperand x86memop,
4145f4a2713aSLionel Sambuc                            OpndItins itins,
4146f4a2713aSLionel Sambuc                            bit IsCommutable = 0,
4147f4a2713aSLionel Sambuc                            bit Is2Addr = 1> {
4148f4a2713aSLionel Sambuc  let isCommutable = IsCommutable in
4149f4a2713aSLionel Sambuc  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
4150f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
4151f4a2713aSLionel Sambuc       !if(Is2Addr,
4152f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4153f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4154f4a2713aSLionel Sambuc       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
4155f4a2713aSLionel Sambuc      Sched<[itins.Sched]>;
4156f4a2713aSLionel Sambuc  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
4157f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
4158f4a2713aSLionel Sambuc       !if(Is2Addr,
4159f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4160f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4161f4a2713aSLionel Sambuc       [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
4162f4a2713aSLionel Sambuc       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4163f4a2713aSLionel Sambuc}
4164f4a2713aSLionel Sambuc
4165f4a2713aSLionel Sambucmulticlass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
4166f4a2713aSLionel Sambuc                             Intrinsic IntId256, OpndItins itins,
4167f4a2713aSLionel Sambuc                             bit IsCommutable = 0> {
4168f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
4169f4a2713aSLionel Sambuc  defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
4170f4a2713aSLionel Sambuc                                 VR128, loadv2i64, i128mem, itins,
4171f4a2713aSLionel Sambuc                                 IsCommutable, 0>, VEX_4V;
4172f4a2713aSLionel Sambuc
4173f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
4174f4a2713aSLionel Sambuc  defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
4175f4a2713aSLionel Sambuc                               i128mem, itins, IsCommutable, 1>;
4176f4a2713aSLionel Sambuc
4177f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
4178f4a2713aSLionel Sambuc  defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
4179f4a2713aSLionel Sambuc                                   VR256, loadv4i64, i256mem, itins,
4180f4a2713aSLionel Sambuc                                   IsCommutable, 0>, VEX_4V, VEX_L;
4181f4a2713aSLionel Sambuc}
4182f4a2713aSLionel Sambuc
4183f4a2713aSLionel Sambucmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
4184f4a2713aSLionel Sambuc                         string OpcodeStr, SDNode OpNode,
4185f4a2713aSLionel Sambuc                         SDNode OpNode2, RegisterClass RC,
4186f4a2713aSLionel Sambuc                         ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
4187f4a2713aSLionel Sambuc                         ShiftOpndItins itins,
4188f4a2713aSLionel Sambuc                         bit Is2Addr = 1> {
4189f4a2713aSLionel Sambuc  // src2 is always 128-bit
4190f4a2713aSLionel Sambuc  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
4191f4a2713aSLionel Sambuc       (ins RC:$src1, VR128:$src2),
4192f4a2713aSLionel Sambuc       !if(Is2Addr,
4193f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4194f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4195f4a2713aSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
4196f4a2713aSLionel Sambuc        itins.rr>, Sched<[WriteVecShift]>;
4197f4a2713aSLionel Sambuc  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
4198f4a2713aSLionel Sambuc       (ins RC:$src1, i128mem:$src2),
4199f4a2713aSLionel Sambuc       !if(Is2Addr,
4200f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4201f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4202f4a2713aSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode RC:$src1,
4203f4a2713aSLionel Sambuc                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
4204f4a2713aSLionel Sambuc      Sched<[WriteVecShiftLd, ReadAfterLd]>;
4205f4a2713aSLionel Sambuc  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
4206f4a2713aSLionel Sambuc       (ins RC:$src1, i8imm:$src2),
4207f4a2713aSLionel Sambuc       !if(Is2Addr,
4208f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4209f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4210f4a2713aSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>,
4211f4a2713aSLionel Sambuc       Sched<[WriteVecShift]>;
4212f4a2713aSLionel Sambuc}
4213f4a2713aSLionel Sambuc
4214f4a2713aSLionel Sambuc/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
4215f4a2713aSLionel Sambucmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
4216f4a2713aSLionel Sambuc                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
4217f4a2713aSLionel Sambuc                         PatFrag memop_frag, X86MemOperand x86memop,
4218f4a2713aSLionel Sambuc                         OpndItins itins,
4219f4a2713aSLionel Sambuc                         bit IsCommutable = 0, bit Is2Addr = 1> {
4220f4a2713aSLionel Sambuc  let isCommutable = IsCommutable in
4221f4a2713aSLionel Sambuc  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
4222f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
4223f4a2713aSLionel Sambuc       !if(Is2Addr,
4224f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4225f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4226f4a2713aSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
4227f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
4228f4a2713aSLionel Sambuc  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
4229f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
4230f4a2713aSLionel Sambuc       !if(Is2Addr,
4231f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4232f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4233f4a2713aSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
4234f4a2713aSLionel Sambuc                                     (bitconvert (memop_frag addr:$src2)))))]>,
4235f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
4236f4a2713aSLionel Sambuc}
4237f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4238f4a2713aSLionel Sambuc
4239f4a2713aSLionel Sambucdefm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
4240f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4241f4a2713aSLionel Sambucdefm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
4242f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4243f4a2713aSLionel Sambucdefm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
4244f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4245f4a2713aSLionel Sambucdefm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
4246f4a2713aSLionel Sambuc                             SSE_INTALUQ_ITINS_P, 1>;
4247f4a2713aSLionel Sambucdefm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
4248f4a2713aSLionel Sambuc                             SSE_INTMUL_ITINS_P, 1>;
4249*0a6a1f1dSLionel Sambucdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
4250*0a6a1f1dSLionel Sambuc                             SSE_INTMUL_ITINS_P, 1>;
4251*0a6a1f1dSLionel Sambucdefm PMULHW  : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
4252*0a6a1f1dSLionel Sambuc                             SSE_INTMUL_ITINS_P, 1>;
4253f4a2713aSLionel Sambucdefm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
4254f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4255f4a2713aSLionel Sambucdefm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
4256f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4257f4a2713aSLionel Sambucdefm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
4258f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4259f4a2713aSLionel Sambucdefm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
4260f4a2713aSLionel Sambuc                             SSE_INTALUQ_ITINS_P, 0>;
4261f4a2713aSLionel Sambucdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
4262f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4263f4a2713aSLionel Sambucdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
4264f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4265f4a2713aSLionel Sambucdefm PMINUB  : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
4266f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4267f4a2713aSLionel Sambucdefm PMINSW  : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
4268f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4269f4a2713aSLionel Sambucdefm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
4270f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4271f4a2713aSLionel Sambucdefm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
4272f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4273f4a2713aSLionel Sambuc
4274f4a2713aSLionel Sambuc// Intrinsic forms
4275f4a2713aSLionel Sambucdefm PSUBSB  : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
4276f4a2713aSLionel Sambuc                                 int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
4277f4a2713aSLionel Sambucdefm PSUBSW  : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
4278f4a2713aSLionel Sambuc                                 int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
4279f4a2713aSLionel Sambucdefm PADDSB  : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
4280f4a2713aSLionel Sambuc                                 int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
4281f4a2713aSLionel Sambucdefm PADDSW  : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
4282f4a2713aSLionel Sambuc                                 int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
4283f4a2713aSLionel Sambucdefm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
4284f4a2713aSLionel Sambuc                                 int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
4285f4a2713aSLionel Sambucdefm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
4286f4a2713aSLionel Sambuc                                 int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
4287f4a2713aSLionel Sambucdefm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
4288f4a2713aSLionel Sambuc                                 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
4289f4a2713aSLionel Sambucdefm PAVGB   : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
4290f4a2713aSLionel Sambuc                                 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
4291f4a2713aSLionel Sambucdefm PAVGW   : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
4292f4a2713aSLionel Sambuc                                 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
4293f4a2713aSLionel Sambucdefm PSADBW  : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
4294f4a2713aSLionel Sambuc                                 int_x86_avx2_psad_bw, SSE_PMADD, 1>;
4295f4a2713aSLionel Sambuc
4296f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
4297f4a2713aSLionel Sambucdefm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
4298f4a2713aSLionel Sambuc                              loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
4299f4a2713aSLionel Sambuc                              VEX_4V;
4300f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
4301f4a2713aSLionel Sambucdefm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
4302f4a2713aSLionel Sambuc                               VR256, loadv4i64, i256mem,
4303f4a2713aSLionel Sambuc                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
4304f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
4305f4a2713aSLionel Sambucdefm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
4306f4a2713aSLionel Sambuc                             memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
4307f4a2713aSLionel Sambuc
4308f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4309f4a2713aSLionel Sambuc// SSE2 - Packed Integer Logical Instructions
4310f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4311f4a2713aSLionel Sambuc
4312f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4313f4a2713aSLionel Sambucdefm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
4314f4a2713aSLionel Sambuc                            VR128, v8i16, v8i16, bc_v8i16,
4315f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4316f4a2713aSLionel Sambucdefm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
4317f4a2713aSLionel Sambuc                            VR128, v4i32, v4i32, bc_v4i32,
4318f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4319f4a2713aSLionel Sambucdefm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
4320f4a2713aSLionel Sambuc                            VR128, v2i64, v2i64, bc_v2i64,
4321f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4322f4a2713aSLionel Sambuc
4323f4a2713aSLionel Sambucdefm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
4324f4a2713aSLionel Sambuc                            VR128, v8i16, v8i16, bc_v8i16,
4325f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4326f4a2713aSLionel Sambucdefm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
4327f4a2713aSLionel Sambuc                            VR128, v4i32, v4i32, bc_v4i32,
4328f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4329f4a2713aSLionel Sambucdefm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
4330f4a2713aSLionel Sambuc                            VR128, v2i64, v2i64, bc_v2i64,
4331f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4332f4a2713aSLionel Sambuc
4333f4a2713aSLionel Sambucdefm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
4334f4a2713aSLionel Sambuc                            VR128, v8i16, v8i16, bc_v8i16,
4335f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4336f4a2713aSLionel Sambucdefm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
4337f4a2713aSLionel Sambuc                            VR128, v4i32, v4i32, bc_v4i32,
4338f4a2713aSLionel Sambuc                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
4339f4a2713aSLionel Sambuc
4340f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
4341f4a2713aSLionel Sambuc  // 128-bit logical shifts.
4342f4a2713aSLionel Sambuc  def VPSLLDQri : PDIi8<0x73, MRM7r,
4343f4a2713aSLionel Sambuc                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4344f4a2713aSLionel Sambuc                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4345f4a2713aSLionel Sambuc                    [(set VR128:$dst,
4346f4a2713aSLionel Sambuc                      (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
4347f4a2713aSLionel Sambuc                    VEX_4V;
4348f4a2713aSLionel Sambuc  def VPSRLDQri : PDIi8<0x73, MRM3r,
4349f4a2713aSLionel Sambuc                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4350f4a2713aSLionel Sambuc                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4351f4a2713aSLionel Sambuc                    [(set VR128:$dst,
4352f4a2713aSLionel Sambuc                      (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
4353f4a2713aSLionel Sambuc                    VEX_4V;
4354f4a2713aSLionel Sambuc  // PSRADQri doesn't exist in SSE[1-3].
4355f4a2713aSLionel Sambuc}
4356f4a2713aSLionel Sambuc} // Predicates = [HasAVX]
4357f4a2713aSLionel Sambuc
4358f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
4359f4a2713aSLionel Sambucdefm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
4360f4a2713aSLionel Sambuc                             VR256, v16i16, v8i16, bc_v8i16,
4361f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4362f4a2713aSLionel Sambucdefm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
4363f4a2713aSLionel Sambuc                             VR256, v8i32, v4i32, bc_v4i32,
4364f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4365f4a2713aSLionel Sambucdefm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
4366f4a2713aSLionel Sambuc                             VR256, v4i64, v2i64, bc_v2i64,
4367f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4368f4a2713aSLionel Sambuc
4369f4a2713aSLionel Sambucdefm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
4370f4a2713aSLionel Sambuc                             VR256, v16i16, v8i16, bc_v8i16,
4371f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4372f4a2713aSLionel Sambucdefm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
4373f4a2713aSLionel Sambuc                             VR256, v8i32, v4i32, bc_v4i32,
4374f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4375f4a2713aSLionel Sambucdefm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
4376f4a2713aSLionel Sambuc                             VR256, v4i64, v2i64, bc_v2i64,
4377f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4378f4a2713aSLionel Sambuc
4379f4a2713aSLionel Sambucdefm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
4380f4a2713aSLionel Sambuc                             VR256, v16i16, v8i16, bc_v8i16,
4381f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4382f4a2713aSLionel Sambucdefm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
4383f4a2713aSLionel Sambuc                             VR256, v8i32, v4i32, bc_v4i32,
4384f4a2713aSLionel Sambuc                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
4385f4a2713aSLionel Sambuc
4386f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
4387f4a2713aSLionel Sambuc  // 256-bit logical shifts.
4388f4a2713aSLionel Sambuc  def VPSLLDQYri : PDIi8<0x73, MRM7r,
4389f4a2713aSLionel Sambuc                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
4390f4a2713aSLionel Sambuc                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4391f4a2713aSLionel Sambuc                    [(set VR256:$dst,
4392f4a2713aSLionel Sambuc                      (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
4393f4a2713aSLionel Sambuc                    VEX_4V, VEX_L;
4394f4a2713aSLionel Sambuc  def VPSRLDQYri : PDIi8<0x73, MRM3r,
4395f4a2713aSLionel Sambuc                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
4396f4a2713aSLionel Sambuc                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4397f4a2713aSLionel Sambuc                    [(set VR256:$dst,
4398f4a2713aSLionel Sambuc                      (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
4399f4a2713aSLionel Sambuc                    VEX_4V, VEX_L;
4400f4a2713aSLionel Sambuc  // PSRADQYri doesn't exist in SSE[1-3].
4401f4a2713aSLionel Sambuc}
4402f4a2713aSLionel Sambuc} // Predicates = [HasAVX2]
4403f4a2713aSLionel Sambuc
4404f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
4405f4a2713aSLionel Sambucdefm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
4406f4a2713aSLionel Sambuc                           VR128, v8i16, v8i16, bc_v8i16,
4407f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4408f4a2713aSLionel Sambucdefm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
4409f4a2713aSLionel Sambuc                           VR128, v4i32, v4i32, bc_v4i32,
4410f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4411f4a2713aSLionel Sambucdefm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
4412f4a2713aSLionel Sambuc                           VR128, v2i64, v2i64, bc_v2i64,
4413f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4414f4a2713aSLionel Sambuc
4415f4a2713aSLionel Sambucdefm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
4416f4a2713aSLionel Sambuc                           VR128, v8i16, v8i16, bc_v8i16,
4417f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4418f4a2713aSLionel Sambucdefm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
4419f4a2713aSLionel Sambuc                           VR128, v4i32, v4i32, bc_v4i32,
4420f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4421f4a2713aSLionel Sambucdefm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
4422f4a2713aSLionel Sambuc                           VR128, v2i64, v2i64, bc_v2i64,
4423f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4424f4a2713aSLionel Sambuc
4425f4a2713aSLionel Sambucdefm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
4426f4a2713aSLionel Sambuc                           VR128, v8i16, v8i16, bc_v8i16,
4427f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4428f4a2713aSLionel Sambucdefm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
4429f4a2713aSLionel Sambuc                           VR128, v4i32, v4i32, bc_v4i32,
4430f4a2713aSLionel Sambuc                           SSE_INTSHIFT_ITINS_P>;
4431f4a2713aSLionel Sambuc
4432f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
4433f4a2713aSLionel Sambuc  // 128-bit logical shifts.
4434f4a2713aSLionel Sambuc  def PSLLDQri : PDIi8<0x73, MRM7r,
4435f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4436f4a2713aSLionel Sambuc                       "pslldq\t{$src2, $dst|$dst, $src2}",
4437f4a2713aSLionel Sambuc                       [(set VR128:$dst,
4438f4a2713aSLionel Sambuc                         (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
4439f4a2713aSLionel Sambuc                         IIC_SSE_INTSHDQ_P_RI>;
4440f4a2713aSLionel Sambuc  def PSRLDQri : PDIi8<0x73, MRM3r,
4441f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4442f4a2713aSLionel Sambuc                       "psrldq\t{$src2, $dst|$dst, $src2}",
4443f4a2713aSLionel Sambuc                       [(set VR128:$dst,
4444f4a2713aSLionel Sambuc                         (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
4445f4a2713aSLionel Sambuc                         IIC_SSE_INTSHDQ_P_RI>;
4446f4a2713aSLionel Sambuc  // PSRADQri doesn't exist in SSE[1-3].
4447f4a2713aSLionel Sambuc}
4448f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst"
4449f4a2713aSLionel Sambuc
4450f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4451f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4452f4a2713aSLionel Sambuc            (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4453f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4454f4a2713aSLionel Sambuc            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4455f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
4456f4a2713aSLionel Sambuc            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4457f4a2713aSLionel Sambuc
4458f4a2713aSLionel Sambuc  // Shift up / down and insert zero's.
4459f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4460f4a2713aSLionel Sambuc            (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4461f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4462f4a2713aSLionel Sambuc            (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4463f4a2713aSLionel Sambuc}
4464f4a2713aSLionel Sambuc
4465f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
4466f4a2713aSLionel Sambuc  def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
4467f4a2713aSLionel Sambuc            (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4468f4a2713aSLionel Sambuc  def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
4469f4a2713aSLionel Sambuc            (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4470f4a2713aSLionel Sambuc}
4471f4a2713aSLionel Sambuc
4472f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
4473f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4474f4a2713aSLionel Sambuc            (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4475f4a2713aSLionel Sambuc  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4476f4a2713aSLionel Sambuc            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4477f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
4478f4a2713aSLionel Sambuc            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4479f4a2713aSLionel Sambuc
4480f4a2713aSLionel Sambuc  // Shift up / down and insert zero's.
4481f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4482f4a2713aSLionel Sambuc            (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4483f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4484f4a2713aSLionel Sambuc            (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4485f4a2713aSLionel Sambuc}
4486f4a2713aSLionel Sambuc
4487f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4488f4a2713aSLionel Sambuc// SSE2 - Packed Integer Comparison Instructions
4489f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4490f4a2713aSLionel Sambuc
4491f4a2713aSLionel Sambucdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
4492f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4493f4a2713aSLionel Sambucdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
4494f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4495f4a2713aSLionel Sambucdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
4496f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 1>;
4497f4a2713aSLionel Sambucdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
4498f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4499f4a2713aSLionel Sambucdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
4500f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4501f4a2713aSLionel Sambucdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
4502f4a2713aSLionel Sambuc                             SSE_INTALU_ITINS_P, 0>;
4503f4a2713aSLionel Sambuc
4504f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4505f4a2713aSLionel Sambuc// SSE2 - Packed Integer Shuffle Instructions
4506f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4507f4a2713aSLionel Sambuc
4508f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in {
4509f4a2713aSLionel Sambucmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
4510f4a2713aSLionel Sambuc                         SDNode OpNode> {
4511f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4512f4a2713aSLionel Sambuc  def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
4513f4a2713aSLionel Sambuc                      (ins VR128:$src1, i8imm:$src2),
4514f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
4515f4a2713aSLionel Sambuc                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4516f4a2713aSLionel Sambuc                      [(set VR128:$dst,
4517f4a2713aSLionel Sambuc                        (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
4518f4a2713aSLionel Sambuc                      IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
4519f4a2713aSLionel Sambuc  def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
4520f4a2713aSLionel Sambuc                      (ins i128mem:$src1, i8imm:$src2),
4521f4a2713aSLionel Sambuc                      !strconcat("v", OpcodeStr,
4522f4a2713aSLionel Sambuc                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4523f4a2713aSLionel Sambuc                     [(set VR128:$dst,
4524f4a2713aSLionel Sambuc                       (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
4525f4a2713aSLionel Sambuc                        (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
4526f4a2713aSLionel Sambuc                  Sched<[WriteShuffleLd]>;
4527f4a2713aSLionel Sambuc}
4528f4a2713aSLionel Sambuc
4529f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
4530f4a2713aSLionel Sambuc  def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
4531f4a2713aSLionel Sambuc                       (ins VR256:$src1, i8imm:$src2),
4532f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
4533f4a2713aSLionel Sambuc                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4534f4a2713aSLionel Sambuc                       [(set VR256:$dst,
4535f4a2713aSLionel Sambuc                         (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
4536f4a2713aSLionel Sambuc                       IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
4537f4a2713aSLionel Sambuc  def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
4538f4a2713aSLionel Sambuc                       (ins i256mem:$src1, i8imm:$src2),
4539f4a2713aSLionel Sambuc                       !strconcat("v", OpcodeStr,
4540f4a2713aSLionel Sambuc                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4541f4a2713aSLionel Sambuc                      [(set VR256:$dst,
4542f4a2713aSLionel Sambuc                        (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
4543f4a2713aSLionel Sambuc                         (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
4544f4a2713aSLionel Sambuc                   Sched<[WriteShuffleLd]>;
4545f4a2713aSLionel Sambuc}
4546f4a2713aSLionel Sambuc
4547f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
4548f4a2713aSLionel Sambuc  def ri : Ii8<0x70, MRMSrcReg,
4549f4a2713aSLionel Sambuc               (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
4550f4a2713aSLionel Sambuc               !strconcat(OpcodeStr,
4551f4a2713aSLionel Sambuc                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4552f4a2713aSLionel Sambuc                [(set VR128:$dst,
4553f4a2713aSLionel Sambuc                  (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
4554f4a2713aSLionel Sambuc                IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
4555f4a2713aSLionel Sambuc  def mi : Ii8<0x70, MRMSrcMem,
4556f4a2713aSLionel Sambuc               (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
4557f4a2713aSLionel Sambuc               !strconcat(OpcodeStr,
4558f4a2713aSLionel Sambuc                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4559f4a2713aSLionel Sambuc                [(set VR128:$dst,
4560f4a2713aSLionel Sambuc                  (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
4561f4a2713aSLionel Sambuc                          (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
4562*0a6a1f1dSLionel Sambuc           Sched<[WriteShuffleLd, ReadAfterLd]>;
4563f4a2713aSLionel Sambuc}
4564f4a2713aSLionel Sambuc}
4565f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4566f4a2713aSLionel Sambuc
4567*0a6a1f1dSLionel Sambucdefm PSHUFD  : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD;
4568f4a2713aSLionel Sambucdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
4569f4a2713aSLionel Sambucdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
4570f4a2713aSLionel Sambuc
4571f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4572f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))),
4573f4a2713aSLionel Sambuc            (VPSHUFDmi addr:$src1, imm:$imm)>;
4574f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
4575f4a2713aSLionel Sambuc            (VPSHUFDri VR128:$src1, imm:$imm)>;
4576f4a2713aSLionel Sambuc}
4577f4a2713aSLionel Sambuc
4578f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
4579f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
4580f4a2713aSLionel Sambuc            (PSHUFDmi addr:$src1, imm:$imm)>;
4581f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
4582f4a2713aSLionel Sambuc            (PSHUFDri VR128:$src1, imm:$imm)>;
4583f4a2713aSLionel Sambuc}
4584f4a2713aSLionel Sambuc
4585f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4586*0a6a1f1dSLionel Sambuc// Packed Integer Pack Instructions (SSE & AVX)
4587*0a6a1f1dSLionel Sambuc//===---------------------------------------------------------------------===//
4588*0a6a1f1dSLionel Sambuc
4589*0a6a1f1dSLionel Sambuclet ExeDomain = SSEPackedInt in {
4590*0a6a1f1dSLionel Sambucmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
4591*0a6a1f1dSLionel Sambuc                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
4592*0a6a1f1dSLionel Sambuc                     bit Is2Addr = 1> {
4593*0a6a1f1dSLionel Sambuc  def rr : PDI<opc, MRMSrcReg,
4594*0a6a1f1dSLionel Sambuc               (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
4595*0a6a1f1dSLionel Sambuc               !if(Is2Addr,
4596*0a6a1f1dSLionel Sambuc                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4597*0a6a1f1dSLionel Sambuc                   !strconcat(OpcodeStr,
4598*0a6a1f1dSLionel Sambuc                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4599*0a6a1f1dSLionel Sambuc               [(set VR128:$dst,
4600*0a6a1f1dSLionel Sambuc                     (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
4601*0a6a1f1dSLionel Sambuc               Sched<[WriteShuffle]>;
4602*0a6a1f1dSLionel Sambuc  def rm : PDI<opc, MRMSrcMem,
4603*0a6a1f1dSLionel Sambuc               (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
4604*0a6a1f1dSLionel Sambuc               !if(Is2Addr,
4605*0a6a1f1dSLionel Sambuc                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4606*0a6a1f1dSLionel Sambuc                   !strconcat(OpcodeStr,
4607*0a6a1f1dSLionel Sambuc                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4608*0a6a1f1dSLionel Sambuc               [(set VR128:$dst,
4609*0a6a1f1dSLionel Sambuc                     (OutVT (OpNode VR128:$src1,
4610*0a6a1f1dSLionel Sambuc                                    (bc_frag (memopv2i64 addr:$src2)))))]>,
4611*0a6a1f1dSLionel Sambuc               Sched<[WriteShuffleLd, ReadAfterLd]>;
4612*0a6a1f1dSLionel Sambuc}
4613*0a6a1f1dSLionel Sambuc
4614*0a6a1f1dSLionel Sambucmulticlass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
4615*0a6a1f1dSLionel Sambuc                       ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
4616*0a6a1f1dSLionel Sambuc  def Yrr : PDI<opc, MRMSrcReg,
4617*0a6a1f1dSLionel Sambuc                (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4618*0a6a1f1dSLionel Sambuc                !strconcat(OpcodeStr,
4619*0a6a1f1dSLionel Sambuc                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4620*0a6a1f1dSLionel Sambuc                [(set VR256:$dst,
4621*0a6a1f1dSLionel Sambuc                      (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
4622*0a6a1f1dSLionel Sambuc                Sched<[WriteShuffle]>;
4623*0a6a1f1dSLionel Sambuc  def Yrm : PDI<opc, MRMSrcMem,
4624*0a6a1f1dSLionel Sambuc                (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4625*0a6a1f1dSLionel Sambuc                !strconcat(OpcodeStr,
4626*0a6a1f1dSLionel Sambuc                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4627*0a6a1f1dSLionel Sambuc                [(set VR256:$dst,
4628*0a6a1f1dSLionel Sambuc                      (OutVT (OpNode VR256:$src1,
4629*0a6a1f1dSLionel Sambuc                                     (bc_frag (memopv4i64 addr:$src2)))))]>,
4630*0a6a1f1dSLionel Sambuc                Sched<[WriteShuffleLd, ReadAfterLd]>;
4631*0a6a1f1dSLionel Sambuc}
4632*0a6a1f1dSLionel Sambuc
4633*0a6a1f1dSLionel Sambucmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
4634*0a6a1f1dSLionel Sambuc                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
4635*0a6a1f1dSLionel Sambuc                     bit Is2Addr = 1> {
4636*0a6a1f1dSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg,
4637*0a6a1f1dSLionel Sambuc                 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
4638*0a6a1f1dSLionel Sambuc                 !if(Is2Addr,
4639*0a6a1f1dSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4640*0a6a1f1dSLionel Sambuc                     !strconcat(OpcodeStr,
4641*0a6a1f1dSLionel Sambuc                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4642*0a6a1f1dSLionel Sambuc                 [(set VR128:$dst,
4643*0a6a1f1dSLionel Sambuc                       (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
4644*0a6a1f1dSLionel Sambuc                 Sched<[WriteShuffle]>;
4645*0a6a1f1dSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem,
4646*0a6a1f1dSLionel Sambuc                 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
4647*0a6a1f1dSLionel Sambuc                 !if(Is2Addr,
4648*0a6a1f1dSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4649*0a6a1f1dSLionel Sambuc                     !strconcat(OpcodeStr,
4650*0a6a1f1dSLionel Sambuc                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4651*0a6a1f1dSLionel Sambuc                 [(set VR128:$dst,
4652*0a6a1f1dSLionel Sambuc                       (OutVT (OpNode VR128:$src1,
4653*0a6a1f1dSLionel Sambuc                                      (bc_frag (memopv2i64 addr:$src2)))))]>,
4654*0a6a1f1dSLionel Sambuc                 Sched<[WriteShuffleLd, ReadAfterLd]>;
4655*0a6a1f1dSLionel Sambuc}
4656*0a6a1f1dSLionel Sambuc
4657*0a6a1f1dSLionel Sambucmulticlass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
4658*0a6a1f1dSLionel Sambuc                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
4659*0a6a1f1dSLionel Sambuc  def Yrr : SS48I<opc, MRMSrcReg,
4660*0a6a1f1dSLionel Sambuc                  (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4661*0a6a1f1dSLionel Sambuc                  !strconcat(OpcodeStr,
4662*0a6a1f1dSLionel Sambuc                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4663*0a6a1f1dSLionel Sambuc                  [(set VR256:$dst,
4664*0a6a1f1dSLionel Sambuc                        (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
4665*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffle]>;
4666*0a6a1f1dSLionel Sambuc  def Yrm : SS48I<opc, MRMSrcMem,
4667*0a6a1f1dSLionel Sambuc                  (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4668*0a6a1f1dSLionel Sambuc                  !strconcat(OpcodeStr,
4669*0a6a1f1dSLionel Sambuc                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4670*0a6a1f1dSLionel Sambuc                  [(set VR256:$dst,
4671*0a6a1f1dSLionel Sambuc                        (OutVT (OpNode VR256:$src1,
4672*0a6a1f1dSLionel Sambuc                                       (bc_frag (memopv4i64 addr:$src2)))))]>,
4673*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffleLd, ReadAfterLd]>;
4674*0a6a1f1dSLionel Sambuc}
4675*0a6a1f1dSLionel Sambuc
4676*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in {
4677*0a6a1f1dSLionel Sambuc  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
4678*0a6a1f1dSLionel Sambuc                             bc_v8i16, 0>, VEX_4V;
4679*0a6a1f1dSLionel Sambuc  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
4680*0a6a1f1dSLionel Sambuc                             bc_v4i32, 0>, VEX_4V;
4681*0a6a1f1dSLionel Sambuc
4682*0a6a1f1dSLionel Sambuc  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
4683*0a6a1f1dSLionel Sambuc                             bc_v8i16, 0>, VEX_4V;
4684*0a6a1f1dSLionel Sambuc  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
4685*0a6a1f1dSLionel Sambuc                             bc_v4i32, 0>, VEX_4V;
4686*0a6a1f1dSLionel Sambuc}
4687*0a6a1f1dSLionel Sambuc
4688*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2] in {
4689*0a6a1f1dSLionel Sambuc  defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
4690*0a6a1f1dSLionel Sambuc                               bc_v16i16>, VEX_4V, VEX_L;
4691*0a6a1f1dSLionel Sambuc  defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
4692*0a6a1f1dSLionel Sambuc                               bc_v8i32>, VEX_4V, VEX_L;
4693*0a6a1f1dSLionel Sambuc
4694*0a6a1f1dSLionel Sambuc  defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
4695*0a6a1f1dSLionel Sambuc                               bc_v16i16>, VEX_4V, VEX_L;
4696*0a6a1f1dSLionel Sambuc  defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
4697*0a6a1f1dSLionel Sambuc                               bc_v8i32>, VEX_4V, VEX_L;
4698*0a6a1f1dSLionel Sambuc}
4699*0a6a1f1dSLionel Sambuc
4700*0a6a1f1dSLionel Sambuclet Constraints = "$src1 = $dst" in {
4701*0a6a1f1dSLionel Sambuc  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
4702*0a6a1f1dSLionel Sambuc                            bc_v8i16>;
4703*0a6a1f1dSLionel Sambuc  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
4704*0a6a1f1dSLionel Sambuc                            bc_v4i32>;
4705*0a6a1f1dSLionel Sambuc
4706*0a6a1f1dSLionel Sambuc  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
4707*0a6a1f1dSLionel Sambuc                            bc_v8i16>;
4708*0a6a1f1dSLionel Sambuc
4709*0a6a1f1dSLionel Sambuc  let Predicates = [HasSSE41] in
4710*0a6a1f1dSLionel Sambuc  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
4711*0a6a1f1dSLionel Sambuc                            bc_v4i32>;
4712*0a6a1f1dSLionel Sambuc}
4713*0a6a1f1dSLionel Sambuc} // ExeDomain = SSEPackedInt
4714*0a6a1f1dSLionel Sambuc
4715*0a6a1f1dSLionel Sambuc//===---------------------------------------------------------------------===//
4716f4a2713aSLionel Sambuc// SSE2 - Packed Integer Unpack Instructions
4717f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4718f4a2713aSLionel Sambuc
4719f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in {
4720f4a2713aSLionel Sambucmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
4721f4a2713aSLionel Sambuc                       SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
4722f4a2713aSLionel Sambuc  def rr : PDI<opc, MRMSrcReg,
4723f4a2713aSLionel Sambuc      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
4724f4a2713aSLionel Sambuc      !if(Is2Addr,
4725f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
4726f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4727f4a2713aSLionel Sambuc      [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
4728f4a2713aSLionel Sambuc      IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
4729f4a2713aSLionel Sambuc  def rm : PDI<opc, MRMSrcMem,
4730f4a2713aSLionel Sambuc      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
4731f4a2713aSLionel Sambuc      !if(Is2Addr,
4732f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
4733f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4734f4a2713aSLionel Sambuc      [(set VR128:$dst, (OpNode VR128:$src1,
4735f4a2713aSLionel Sambuc                                  (bc_frag (memopv2i64
4736f4a2713aSLionel Sambuc                                               addr:$src2))))],
4737f4a2713aSLionel Sambuc                                               IIC_SSE_UNPCK>,
4738f4a2713aSLionel Sambuc      Sched<[WriteShuffleLd, ReadAfterLd]>;
4739f4a2713aSLionel Sambuc}
4740f4a2713aSLionel Sambuc
4741f4a2713aSLionel Sambucmulticlass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
4742f4a2713aSLionel Sambuc                         SDNode OpNode, PatFrag bc_frag> {
4743f4a2713aSLionel Sambuc  def Yrr : PDI<opc, MRMSrcReg,
4744f4a2713aSLionel Sambuc      (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4745f4a2713aSLionel Sambuc      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4746f4a2713aSLionel Sambuc      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
4747f4a2713aSLionel Sambuc      Sched<[WriteShuffle]>;
4748f4a2713aSLionel Sambuc  def Yrm : PDI<opc, MRMSrcMem,
4749f4a2713aSLionel Sambuc      (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4750f4a2713aSLionel Sambuc      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4751f4a2713aSLionel Sambuc      [(set VR256:$dst, (OpNode VR256:$src1,
4752f4a2713aSLionel Sambuc                                  (bc_frag (memopv4i64 addr:$src2))))]>,
4753f4a2713aSLionel Sambuc      Sched<[WriteShuffleLd, ReadAfterLd]>;
4754f4a2713aSLionel Sambuc}
4755f4a2713aSLionel Sambuc
4756f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
4757f4a2713aSLionel Sambuc  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
4758f4a2713aSLionel Sambuc                                 bc_v16i8, 0>, VEX_4V;
4759f4a2713aSLionel Sambuc  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
4760f4a2713aSLionel Sambuc                                 bc_v8i16, 0>, VEX_4V;
4761f4a2713aSLionel Sambuc  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
4762f4a2713aSLionel Sambuc                                 bc_v4i32, 0>, VEX_4V;
4763f4a2713aSLionel Sambuc  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
4764f4a2713aSLionel Sambuc                                 bc_v2i64, 0>, VEX_4V;
4765f4a2713aSLionel Sambuc
4766f4a2713aSLionel Sambuc  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
4767f4a2713aSLionel Sambuc                                 bc_v16i8, 0>, VEX_4V;
4768f4a2713aSLionel Sambuc  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
4769f4a2713aSLionel Sambuc                                 bc_v8i16, 0>, VEX_4V;
4770f4a2713aSLionel Sambuc  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
4771f4a2713aSLionel Sambuc                                 bc_v4i32, 0>, VEX_4V;
4772f4a2713aSLionel Sambuc  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
4773f4a2713aSLionel Sambuc                                 bc_v2i64, 0>, VEX_4V;
4774f4a2713aSLionel Sambuc}
4775f4a2713aSLionel Sambuc
4776f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
4777f4a2713aSLionel Sambuc  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
4778f4a2713aSLionel Sambuc                                   bc_v32i8>, VEX_4V, VEX_L;
4779f4a2713aSLionel Sambuc  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
4780f4a2713aSLionel Sambuc                                   bc_v16i16>, VEX_4V, VEX_L;
4781f4a2713aSLionel Sambuc  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
4782f4a2713aSLionel Sambuc                                   bc_v8i32>, VEX_4V, VEX_L;
4783f4a2713aSLionel Sambuc  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
4784f4a2713aSLionel Sambuc                                   bc_v4i64>, VEX_4V, VEX_L;
4785f4a2713aSLionel Sambuc
4786f4a2713aSLionel Sambuc  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
4787f4a2713aSLionel Sambuc                                   bc_v32i8>, VEX_4V, VEX_L;
4788f4a2713aSLionel Sambuc  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
4789f4a2713aSLionel Sambuc                                   bc_v16i16>, VEX_4V, VEX_L;
4790f4a2713aSLionel Sambuc  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
4791f4a2713aSLionel Sambuc                                   bc_v8i32>, VEX_4V, VEX_L;
4792f4a2713aSLionel Sambuc  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
4793f4a2713aSLionel Sambuc                                   bc_v4i64>, VEX_4V, VEX_L;
4794f4a2713aSLionel Sambuc}
4795f4a2713aSLionel Sambuc
4796f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
4797f4a2713aSLionel Sambuc  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
4798f4a2713aSLionel Sambuc                                bc_v16i8>;
4799f4a2713aSLionel Sambuc  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
4800f4a2713aSLionel Sambuc                                bc_v8i16>;
4801f4a2713aSLionel Sambuc  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
4802f4a2713aSLionel Sambuc                                bc_v4i32>;
4803f4a2713aSLionel Sambuc  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
4804f4a2713aSLionel Sambuc                                bc_v2i64>;
4805f4a2713aSLionel Sambuc
4806f4a2713aSLionel Sambuc  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
4807f4a2713aSLionel Sambuc                                bc_v16i8>;
4808f4a2713aSLionel Sambuc  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
4809f4a2713aSLionel Sambuc                                bc_v8i16>;
4810f4a2713aSLionel Sambuc  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
4811f4a2713aSLionel Sambuc                                bc_v4i32>;
4812f4a2713aSLionel Sambuc  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
4813f4a2713aSLionel Sambuc                                bc_v2i64>;
4814f4a2713aSLionel Sambuc}
4815f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4816f4a2713aSLionel Sambuc
4817f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4818f4a2713aSLionel Sambuc// SSE2 - Packed Integer Extract and Insert
4819f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4820f4a2713aSLionel Sambuc
4821f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in {
4822f4a2713aSLionel Sambucmulticlass sse2_pinsrw<bit Is2Addr = 1> {
4823f4a2713aSLionel Sambuc  def rri : Ii8<0xC4, MRMSrcReg,
4824f4a2713aSLionel Sambuc       (outs VR128:$dst), (ins VR128:$src1,
4825f4a2713aSLionel Sambuc        GR32orGR64:$src2, i32i8imm:$src3),
4826f4a2713aSLionel Sambuc       !if(Is2Addr,
4827f4a2713aSLionel Sambuc           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
4828f4a2713aSLionel Sambuc           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4829f4a2713aSLionel Sambuc       [(set VR128:$dst,
4830f4a2713aSLionel Sambuc         (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))],
4831f4a2713aSLionel Sambuc       IIC_SSE_PINSRW>, Sched<[WriteShuffle]>;
4832f4a2713aSLionel Sambuc  def rmi : Ii8<0xC4, MRMSrcMem,
4833f4a2713aSLionel Sambuc                       (outs VR128:$dst), (ins VR128:$src1,
4834f4a2713aSLionel Sambuc                        i16mem:$src2, i32i8imm:$src3),
4835f4a2713aSLionel Sambuc       !if(Is2Addr,
4836f4a2713aSLionel Sambuc           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
4837f4a2713aSLionel Sambuc           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4838f4a2713aSLionel Sambuc       [(set VR128:$dst,
4839f4a2713aSLionel Sambuc         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
4840f4a2713aSLionel Sambuc                    imm:$src3))], IIC_SSE_PINSRW>,
4841f4a2713aSLionel Sambuc       Sched<[WriteShuffleLd, ReadAfterLd]>;
4842f4a2713aSLionel Sambuc}
4843f4a2713aSLionel Sambuc
4844f4a2713aSLionel Sambuc// Extract
4845f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
4846f4a2713aSLionel Sambucdef VPEXTRWri : Ii8<0xC5, MRMSrcReg,
4847f4a2713aSLionel Sambuc                    (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
4848f4a2713aSLionel Sambuc                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4849f4a2713aSLionel Sambuc                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
4850*0a6a1f1dSLionel Sambuc                                            imm:$src2))]>, PD, VEX,
4851f4a2713aSLionel Sambuc                Sched<[WriteShuffle]>;
4852f4a2713aSLionel Sambucdef PEXTRWri : PDIi8<0xC5, MRMSrcReg,
4853f4a2713aSLionel Sambuc                    (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
4854f4a2713aSLionel Sambuc                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4855f4a2713aSLionel Sambuc                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
4856f4a2713aSLionel Sambuc                                            imm:$src2))], IIC_SSE_PEXTRW>,
4857f4a2713aSLionel Sambuc               Sched<[WriteShuffleLd, ReadAfterLd]>;
4858f4a2713aSLionel Sambuc
4859f4a2713aSLionel Sambuc// Insert
4860f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
4861*0a6a1f1dSLionel Sambucdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
4862f4a2713aSLionel Sambuc
4863f4a2713aSLionel Sambuclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
4864*0a6a1f1dSLionel Sambucdefm PINSRW : sse2_pinsrw, PD;
4865f4a2713aSLionel Sambuc
4866f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4867f4a2713aSLionel Sambuc
4868f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4869f4a2713aSLionel Sambuc// SSE2 - Packed Mask Creation
4870f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4871f4a2713aSLionel Sambuc
4872f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
4873f4a2713aSLionel Sambuc
4874f4a2713aSLionel Sambucdef VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
4875f4a2713aSLionel Sambuc           (ins VR128:$src),
4876f4a2713aSLionel Sambuc           "pmovmskb\t{$src, $dst|$dst, $src}",
4877f4a2713aSLionel Sambuc           [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
4878f4a2713aSLionel Sambuc           IIC_SSE_MOVMSK>, VEX;
4879f4a2713aSLionel Sambuc
4880f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
4881f4a2713aSLionel Sambucdef VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
4882f4a2713aSLionel Sambuc           (ins VR256:$src),
4883f4a2713aSLionel Sambuc           "pmovmskb\t{$src, $dst|$dst, $src}",
4884f4a2713aSLionel Sambuc           [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>,
4885f4a2713aSLionel Sambuc           VEX, VEX_L;
4886f4a2713aSLionel Sambuc}
4887f4a2713aSLionel Sambuc
4888f4a2713aSLionel Sambucdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
4889f4a2713aSLionel Sambuc           "pmovmskb\t{$src, $dst|$dst, $src}",
4890f4a2713aSLionel Sambuc           [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
4891f4a2713aSLionel Sambuc           IIC_SSE_MOVMSK>;
4892f4a2713aSLionel Sambuc
4893f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4894f4a2713aSLionel Sambuc
4895f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4896f4a2713aSLionel Sambuc// SSE2 - Conditional Store
4897f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4898f4a2713aSLionel Sambuc
4899f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
4900f4a2713aSLionel Sambuc
4901*0a6a1f1dSLionel Sambuclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
4902f4a2713aSLionel Sambucdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
4903f4a2713aSLionel Sambuc           (ins VR128:$src, VR128:$mask),
4904f4a2713aSLionel Sambuc           "maskmovdqu\t{$mask, $src|$src, $mask}",
4905f4a2713aSLionel Sambuc           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
4906f4a2713aSLionel Sambuc           IIC_SSE_MASKMOV>, VEX;
4907f4a2713aSLionel Sambuclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
4908f4a2713aSLionel Sambucdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
4909f4a2713aSLionel Sambuc           (ins VR128:$src, VR128:$mask),
4910f4a2713aSLionel Sambuc           "maskmovdqu\t{$mask, $src|$src, $mask}",
4911f4a2713aSLionel Sambuc           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
4912f4a2713aSLionel Sambuc           IIC_SSE_MASKMOV>, VEX;
4913f4a2713aSLionel Sambuc
4914*0a6a1f1dSLionel Sambuclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
4915f4a2713aSLionel Sambucdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
4916f4a2713aSLionel Sambuc           "maskmovdqu\t{$mask, $src|$src, $mask}",
4917f4a2713aSLionel Sambuc           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
4918f4a2713aSLionel Sambuc           IIC_SSE_MASKMOV>;
4919f4a2713aSLionel Sambuclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
4920f4a2713aSLionel Sambucdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
4921f4a2713aSLionel Sambuc           "maskmovdqu\t{$mask, $src|$src, $mask}",
4922f4a2713aSLionel Sambuc           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
4923f4a2713aSLionel Sambuc           IIC_SSE_MASKMOV>;
4924f4a2713aSLionel Sambuc
4925f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt
4926f4a2713aSLionel Sambuc
4927f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4928f4a2713aSLionel Sambuc// SSE2 - Move Doubleword
4929f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4930f4a2713aSLionel Sambuc
4931f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4932f4a2713aSLionel Sambuc// Move Int Doubleword to Packed Double Int
4933f4a2713aSLionel Sambuc//
4934f4a2713aSLionel Sambucdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4935f4a2713aSLionel Sambuc                      "movd\t{$src, $dst|$dst, $src}",
4936f4a2713aSLionel Sambuc                      [(set VR128:$dst,
4937f4a2713aSLionel Sambuc                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
4938f4a2713aSLionel Sambuc                        VEX, Sched<[WriteMove]>;
4939f4a2713aSLionel Sambucdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4940f4a2713aSLionel Sambuc                      "movd\t{$src, $dst|$dst, $src}",
4941f4a2713aSLionel Sambuc                      [(set VR128:$dst,
4942f4a2713aSLionel Sambuc                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
4943f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>,
4944f4a2713aSLionel Sambuc                      VEX, Sched<[WriteLoad]>;
4945f4a2713aSLionel Sambucdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4946f4a2713aSLionel Sambuc                        "movq\t{$src, $dst|$dst, $src}",
4947f4a2713aSLionel Sambuc                        [(set VR128:$dst,
4948f4a2713aSLionel Sambuc                          (v2i64 (scalar_to_vector GR64:$src)))],
4949f4a2713aSLionel Sambuc                          IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
4950f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in
4951f4a2713aSLionel Sambucdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4952f4a2713aSLionel Sambuc                       "movq\t{$src, $dst|$dst, $src}",
4953f4a2713aSLionel Sambuc                       [(set FR64:$dst, (bitconvert GR64:$src))],
4954f4a2713aSLionel Sambuc                       IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
4955f4a2713aSLionel Sambuc
4956f4a2713aSLionel Sambucdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4957f4a2713aSLionel Sambuc                      "movd\t{$src, $dst|$dst, $src}",
4958f4a2713aSLionel Sambuc                      [(set VR128:$dst,
4959f4a2713aSLionel Sambuc                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
4960f4a2713aSLionel Sambuc                  Sched<[WriteMove]>;
4961f4a2713aSLionel Sambucdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4962f4a2713aSLionel Sambuc                      "movd\t{$src, $dst|$dst, $src}",
4963f4a2713aSLionel Sambuc                      [(set VR128:$dst,
4964f4a2713aSLionel Sambuc                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
4965f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
4966f4a2713aSLionel Sambucdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4967f4a2713aSLionel Sambuc                        "mov{d|q}\t{$src, $dst|$dst, $src}",
4968f4a2713aSLionel Sambuc                        [(set VR128:$dst,
4969f4a2713aSLionel Sambuc                          (v2i64 (scalar_to_vector GR64:$src)))],
4970f4a2713aSLionel Sambuc                          IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
4971f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in
4972f4a2713aSLionel Sambucdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4973f4a2713aSLionel Sambuc                       "mov{d|q}\t{$src, $dst|$dst, $src}",
4974f4a2713aSLionel Sambuc                       [(set FR64:$dst, (bitconvert GR64:$src))],
4975f4a2713aSLionel Sambuc                       IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
4976f4a2713aSLionel Sambuc
4977f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
4978f4a2713aSLionel Sambuc// Move Int Doubleword to Single Scalar
4979f4a2713aSLionel Sambuc//
4980f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
4981f4a2713aSLionel Sambuc  def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4982f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
4983f4a2713aSLionel Sambuc                        [(set FR32:$dst, (bitconvert GR32:$src))],
4984f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
4985f4a2713aSLionel Sambuc
4986f4a2713aSLionel Sambuc  def VMOVDI2SSrm  : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4987f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
4988f4a2713aSLionel Sambuc                        [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
4989f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>,
4990f4a2713aSLionel Sambuc                        VEX, Sched<[WriteLoad]>;
4991f4a2713aSLionel Sambuc  def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4992f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
4993f4a2713aSLionel Sambuc                        [(set FR32:$dst, (bitconvert GR32:$src))],
4994f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
4995f4a2713aSLionel Sambuc
4996f4a2713aSLionel Sambuc  def MOVDI2SSrm  : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4997f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
4998f4a2713aSLionel Sambuc                        [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
4999f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
5000f4a2713aSLionel Sambuc}
5001f4a2713aSLionel Sambuc
5002f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5003f4a2713aSLionel Sambuc// Move Packed Doubleword Int to Packed Double Int
5004f4a2713aSLionel Sambuc//
5005f4a2713aSLionel Sambucdef VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
5006f4a2713aSLionel Sambuc                       "movd\t{$src, $dst|$dst, $src}",
5007f4a2713aSLionel Sambuc                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
5008f4a2713aSLionel Sambuc                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
5009f4a2713aSLionel Sambuc                    Sched<[WriteMove]>;
5010f4a2713aSLionel Sambucdef VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
5011f4a2713aSLionel Sambuc                       (ins i32mem:$dst, VR128:$src),
5012f4a2713aSLionel Sambuc                       "movd\t{$src, $dst|$dst, $src}",
5013f4a2713aSLionel Sambuc                       [(store (i32 (vector_extract (v4i32 VR128:$src),
5014f4a2713aSLionel Sambuc                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
5015*0a6a1f1dSLionel Sambuc                                     VEX, Sched<[WriteStore]>;
5016f4a2713aSLionel Sambucdef MOVPDI2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
5017f4a2713aSLionel Sambuc                       "movd\t{$src, $dst|$dst, $src}",
5018f4a2713aSLionel Sambuc                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
5019f4a2713aSLionel Sambuc                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
5020f4a2713aSLionel Sambuc                   Sched<[WriteMove]>;
5021f4a2713aSLionel Sambucdef MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
5022f4a2713aSLionel Sambuc                       "movd\t{$src, $dst|$dst, $src}",
5023f4a2713aSLionel Sambuc                       [(store (i32 (vector_extract (v4i32 VR128:$src),
5024f4a2713aSLionel Sambuc                                     (iPTR 0))), addr:$dst)],
5025*0a6a1f1dSLionel Sambuc                                     IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
5026f4a2713aSLionel Sambuc
5027f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
5028f4a2713aSLionel Sambuc        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
5029f4a2713aSLionel Sambuc
5030f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
5031f4a2713aSLionel Sambuc        (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
5032f4a2713aSLionel Sambuc
5033f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
5034f4a2713aSLionel Sambuc        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
5035f4a2713aSLionel Sambuc
5036f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
5037f4a2713aSLionel Sambuc        (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
5038f4a2713aSLionel Sambuc
5039f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5040f4a2713aSLionel Sambuc// Move Packed Doubleword Int first element to Doubleword Int
5041f4a2713aSLionel Sambuc//
5042f4a2713aSLionel Sambuclet SchedRW = [WriteMove] in {
5043f4a2713aSLionel Sambucdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
5044f4a2713aSLionel Sambuc                          "movq\t{$src, $dst|$dst, $src}",
5045f4a2713aSLionel Sambuc                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
5046f4a2713aSLionel Sambuc                                                           (iPTR 0)))],
5047f4a2713aSLionel Sambuc                                                           IIC_SSE_MOVD_ToGP>,
5048f4a2713aSLionel Sambuc                      VEX;
5049f4a2713aSLionel Sambuc
5050f4a2713aSLionel Sambucdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
5051f4a2713aSLionel Sambuc                        "mov{d|q}\t{$src, $dst|$dst, $src}",
5052f4a2713aSLionel Sambuc                        [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
5053f4a2713aSLionel Sambuc                                                         (iPTR 0)))],
5054f4a2713aSLionel Sambuc                                                         IIC_SSE_MOVD_ToGP>;
5055f4a2713aSLionel Sambuc} //SchedRW
5056f4a2713aSLionel Sambuc
5057f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5058f4a2713aSLionel Sambuc// Bitcast FR64 <-> GR64
5059f4a2713aSLionel Sambuc//
5060f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
5061f4a2713aSLionel Sambuc  let Predicates = [UseAVX] in
5062f4a2713aSLionel Sambuc  def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
5063f4a2713aSLionel Sambuc                          "movq\t{$src, $dst|$dst, $src}",
5064f4a2713aSLionel Sambuc                          [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
5065f4a2713aSLionel Sambuc                          VEX, Sched<[WriteLoad]>;
5066f4a2713aSLionel Sambuc  def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
5067f4a2713aSLionel Sambuc                           "movq\t{$src, $dst|$dst, $src}",
5068f4a2713aSLionel Sambuc                           [(set GR64:$dst, (bitconvert FR64:$src))],
5069f4a2713aSLionel Sambuc                           IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
5070f4a2713aSLionel Sambuc  def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
5071f4a2713aSLionel Sambuc                           "movq\t{$src, $dst|$dst, $src}",
5072f4a2713aSLionel Sambuc                           [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
5073f4a2713aSLionel Sambuc                           IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
5074f4a2713aSLionel Sambuc
5075f4a2713aSLionel Sambuc  def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
5076f4a2713aSLionel Sambuc                         "movq\t{$src, $dst|$dst, $src}",
5077f4a2713aSLionel Sambuc                         [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
5078f4a2713aSLionel Sambuc                         IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
5079f4a2713aSLionel Sambuc  def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
5080f4a2713aSLionel Sambuc                         "mov{d|q}\t{$src, $dst|$dst, $src}",
5081f4a2713aSLionel Sambuc                         [(set GR64:$dst, (bitconvert FR64:$src))],
5082f4a2713aSLionel Sambuc                         IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
5083f4a2713aSLionel Sambuc  def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
5084f4a2713aSLionel Sambuc                         "movq\t{$src, $dst|$dst, $src}",
5085f4a2713aSLionel Sambuc                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
5086f4a2713aSLionel Sambuc                         IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
5087f4a2713aSLionel Sambuc}
5088f4a2713aSLionel Sambuc
5089f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5090f4a2713aSLionel Sambuc// Move Scalar Single to Double Int
5091f4a2713aSLionel Sambuc//
5092f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in {
5093f4a2713aSLionel Sambuc  def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
5094f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
5095f4a2713aSLionel Sambuc                        [(set GR32:$dst, (bitconvert FR32:$src))],
5096f4a2713aSLionel Sambuc                        IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
5097f4a2713aSLionel Sambuc  def VMOVSS2DImr  : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
5098f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
5099f4a2713aSLionel Sambuc                        [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
5100f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
5101f4a2713aSLionel Sambuc  def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
5102f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
5103f4a2713aSLionel Sambuc                        [(set GR32:$dst, (bitconvert FR32:$src))],
5104f4a2713aSLionel Sambuc                        IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
5105f4a2713aSLionel Sambuc  def MOVSS2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
5106f4a2713aSLionel Sambuc                        "movd\t{$src, $dst|$dst, $src}",
5107f4a2713aSLionel Sambuc                        [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
5108f4a2713aSLionel Sambuc                        IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
5109f4a2713aSLionel Sambuc}
5110f4a2713aSLionel Sambuc
5111f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5112f4a2713aSLionel Sambuc// Patterns and instructions to describe movd/movq to XMM register zero-extends
5113f4a2713aSLionel Sambuc//
5114f4a2713aSLionel Sambuclet isCodeGenOnly = 1, SchedRW = [WriteMove] in {
5115f4a2713aSLionel Sambuclet AddedComplexity = 15 in {
5116f4a2713aSLionel Sambucdef VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
5117f4a2713aSLionel Sambuc                       "movq\t{$src, $dst|$dst, $src}", // X86-64 only
5118f4a2713aSLionel Sambuc                       [(set VR128:$dst, (v2i64 (X86vzmovl
5119f4a2713aSLionel Sambuc                                      (v2i64 (scalar_to_vector GR64:$src)))))],
5120f4a2713aSLionel Sambuc                                      IIC_SSE_MOVDQ>,
5121f4a2713aSLionel Sambuc                                      VEX, VEX_W;
5122f4a2713aSLionel Sambucdef MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
5123f4a2713aSLionel Sambuc                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
5124f4a2713aSLionel Sambuc                       [(set VR128:$dst, (v2i64 (X86vzmovl
5125f4a2713aSLionel Sambuc                                      (v2i64 (scalar_to_vector GR64:$src)))))],
5126f4a2713aSLionel Sambuc                                      IIC_SSE_MOVDQ>;
5127f4a2713aSLionel Sambuc}
5128f4a2713aSLionel Sambuc} // isCodeGenOnly, SchedRW
5129f4a2713aSLionel Sambuc
5130f4a2713aSLionel Sambuclet Predicates = [UseAVX] in {
5131f4a2713aSLionel Sambuc  let AddedComplexity = 15 in
5132f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
5133f4a2713aSLionel Sambuc              (VMOVDI2PDIrr GR32:$src)>;
5134f4a2713aSLionel Sambuc
5135f4a2713aSLionel Sambuc  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
5136f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
5137f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
5138f4a2713aSLionel Sambuc              (VMOVDI2PDIrm addr:$src)>;
5139f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
5140f4a2713aSLionel Sambuc              (VMOVDI2PDIrm addr:$src)>;
5141f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
5142f4a2713aSLionel Sambuc              (VMOVDI2PDIrm addr:$src)>;
5143f4a2713aSLionel Sambuc  }
5144f4a2713aSLionel Sambuc  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
5145f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
5146f4a2713aSLionel Sambuc                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
5147f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
5148f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
5149f4a2713aSLionel Sambuc                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
5150f4a2713aSLionel Sambuc            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
5151f4a2713aSLionel Sambuc}
5152f4a2713aSLionel Sambuc
5153f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in {
5154f4a2713aSLionel Sambuc  let AddedComplexity = 15 in
5155f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
5156f4a2713aSLionel Sambuc              (MOVDI2PDIrr GR32:$src)>;
5157f4a2713aSLionel Sambuc
5158f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
5159f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
5160f4a2713aSLionel Sambuc              (MOVDI2PDIrm addr:$src)>;
5161f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
5162f4a2713aSLionel Sambuc              (MOVDI2PDIrm addr:$src)>;
5163f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
5164f4a2713aSLionel Sambuc              (MOVDI2PDIrm addr:$src)>;
5165f4a2713aSLionel Sambuc  }
5166f4a2713aSLionel Sambuc}
5167f4a2713aSLionel Sambuc
5168f4a2713aSLionel Sambuc// These are the correct encodings of the instructions so that we know how to
5169f4a2713aSLionel Sambuc// read correct assembly, even though we continue to emit the wrong ones for
5170f4a2713aSLionel Sambuc// compatibility with Darwin's buggy assembler.
5171f4a2713aSLionel Sambucdef : InstAlias<"movq\t{$src, $dst|$dst, $src}",
5172f4a2713aSLionel Sambuc                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
5173f4a2713aSLionel Sambucdef : InstAlias<"movq\t{$src, $dst|$dst, $src}",
5174f4a2713aSLionel Sambuc                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
5175f4a2713aSLionel Sambuc// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
5176f4a2713aSLionel Sambucdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
5177f4a2713aSLionel Sambuc                (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
5178f4a2713aSLionel Sambucdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
5179f4a2713aSLionel Sambuc                (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
5180f4a2713aSLionel Sambuc
5181f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5182f4a2713aSLionel Sambuc// SSE2 - Move Quadword
5183f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5184f4a2713aSLionel Sambuc
5185f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5186f4a2713aSLionel Sambuc// Move Quadword Int to Packed Quadword Int
5187f4a2713aSLionel Sambuc//
5188f4a2713aSLionel Sambuc
5189f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in {
5190f4a2713aSLionel Sambucdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
5191f4a2713aSLionel Sambuc                    "vmovq\t{$src, $dst|$dst, $src}",
5192f4a2713aSLionel Sambuc                    [(set VR128:$dst,
5193f4a2713aSLionel Sambuc                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
5194f4a2713aSLionel Sambuc                    VEX, Requires<[UseAVX]>;
5195f4a2713aSLionel Sambucdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
5196f4a2713aSLionel Sambuc                    "movq\t{$src, $dst|$dst, $src}",
5197f4a2713aSLionel Sambuc                    [(set VR128:$dst,
5198f4a2713aSLionel Sambuc                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
5199f4a2713aSLionel Sambuc                      IIC_SSE_MOVDQ>, XS,
5200f4a2713aSLionel Sambuc                    Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
5201f4a2713aSLionel Sambuc} // SchedRW
5202f4a2713aSLionel Sambuc
5203f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5204f4a2713aSLionel Sambuc// Move Packed Quadword Int to Quadword Int
5205f4a2713aSLionel Sambuc//
5206f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in {
5207f4a2713aSLionel Sambucdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
5208f4a2713aSLionel Sambuc                      "movq\t{$src, $dst|$dst, $src}",
5209f4a2713aSLionel Sambuc                      [(store (i64 (vector_extract (v2i64 VR128:$src),
5210f4a2713aSLionel Sambuc                                    (iPTR 0))), addr:$dst)],
5211f4a2713aSLionel Sambuc                                    IIC_SSE_MOVDQ>, VEX;
5212f4a2713aSLionel Sambucdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
5213f4a2713aSLionel Sambuc                      "movq\t{$src, $dst|$dst, $src}",
5214f4a2713aSLionel Sambuc                      [(store (i64 (vector_extract (v2i64 VR128:$src),
5215f4a2713aSLionel Sambuc                                    (iPTR 0))), addr:$dst)],
5216f4a2713aSLionel Sambuc                                    IIC_SSE_MOVDQ>;
5217f4a2713aSLionel Sambuc} // SchedRW
5218f4a2713aSLionel Sambuc
5219*0a6a1f1dSLionel Sambuc// For disassembler only
5220*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
5221*0a6a1f1dSLionel Sambuc    SchedRW = [WriteVecLogic] in {
5222*0a6a1f1dSLionel Sambucdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5223*0a6a1f1dSLionel Sambuc                     "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX;
5224*0a6a1f1dSLionel Sambucdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5225*0a6a1f1dSLionel Sambuc                      "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>;
5226*0a6a1f1dSLionel Sambuc}
5227*0a6a1f1dSLionel Sambuc
5228f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5229f4a2713aSLionel Sambuc// Store / copy lower 64-bits of a XMM register.
5230f4a2713aSLionel Sambuc//
5231*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in
5232*0a6a1f1dSLionel Sambucdef : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
5233*0a6a1f1dSLionel Sambuc          (VMOVPQI2QImr addr:$dst, VR128:$src)>;
5234*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in
5235*0a6a1f1dSLionel Sambucdef : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
5236*0a6a1f1dSLionel Sambuc          (MOVPQI2QImr addr:$dst, VR128:$src)>;
5237f4a2713aSLionel Sambuc
5238f4a2713aSLionel Sambuclet isCodeGenOnly = 1, AddedComplexity = 20 in {
5239f4a2713aSLionel Sambucdef VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
5240f4a2713aSLionel Sambuc                     "vmovq\t{$src, $dst|$dst, $src}",
5241f4a2713aSLionel Sambuc                     [(set VR128:$dst,
5242f4a2713aSLionel Sambuc                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
5243f4a2713aSLionel Sambuc                                                 (loadi64 addr:$src))))))],
5244f4a2713aSLionel Sambuc                                                 IIC_SSE_MOVDQ>,
5245f4a2713aSLionel Sambuc                     XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>;
5246f4a2713aSLionel Sambuc
5247f4a2713aSLionel Sambucdef MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
5248f4a2713aSLionel Sambuc                     "movq\t{$src, $dst|$dst, $src}",
5249f4a2713aSLionel Sambuc                     [(set VR128:$dst,
5250f4a2713aSLionel Sambuc                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
5251f4a2713aSLionel Sambuc                                                 (loadi64 addr:$src))))))],
5252f4a2713aSLionel Sambuc                                                 IIC_SSE_MOVDQ>,
5253f4a2713aSLionel Sambuc                     XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
5254f4a2713aSLionel Sambuc}
5255f4a2713aSLionel Sambuc
5256f4a2713aSLionel Sambuclet Predicates = [UseAVX], AddedComplexity = 20 in {
5257f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
5258f4a2713aSLionel Sambuc            (VMOVZQI2PQIrm addr:$src)>;
5259f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vzload addr:$src)),
5260f4a2713aSLionel Sambuc            (VMOVZQI2PQIrm addr:$src)>;
5261f4a2713aSLionel Sambuc}
5262f4a2713aSLionel Sambuc
5263f4a2713aSLionel Sambuclet Predicates = [UseSSE2], AddedComplexity = 20 in {
5264f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
5265f4a2713aSLionel Sambuc            (MOVZQI2PQIrm addr:$src)>;
5266f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
5267f4a2713aSLionel Sambuc}
5268f4a2713aSLionel Sambuc
5269f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5270f4a2713aSLionel Sambucdef : Pat<(v4i64 (alignedX86vzload addr:$src)),
5271f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
5272f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86vzload addr:$src)),
5273f4a2713aSLionel Sambuc          (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
5274f4a2713aSLionel Sambuc}
5275f4a2713aSLionel Sambuc
5276f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5277f4a2713aSLionel Sambuc// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
5278f4a2713aSLionel Sambuc// IA32 document. movq xmm1, xmm2 does clear the high bits.
5279f4a2713aSLionel Sambuc//
5280f4a2713aSLionel Sambuclet SchedRW = [WriteVecLogic] in {
5281f4a2713aSLionel Sambuclet AddedComplexity = 15 in
5282f4a2713aSLionel Sambucdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5283f4a2713aSLionel Sambuc                        "vmovq\t{$src, $dst|$dst, $src}",
5284f4a2713aSLionel Sambuc                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
5285f4a2713aSLionel Sambuc                    IIC_SSE_MOVQ_RR>,
5286f4a2713aSLionel Sambuc                      XS, VEX, Requires<[UseAVX]>;
5287f4a2713aSLionel Sambuclet AddedComplexity = 15 in
5288f4a2713aSLionel Sambucdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5289f4a2713aSLionel Sambuc                        "movq\t{$src, $dst|$dst, $src}",
5290f4a2713aSLionel Sambuc                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
5291f4a2713aSLionel Sambuc                    IIC_SSE_MOVQ_RR>,
5292f4a2713aSLionel Sambuc                      XS, Requires<[UseSSE2]>;
5293f4a2713aSLionel Sambuc} // SchedRW
5294f4a2713aSLionel Sambuc
5295f4a2713aSLionel Sambuclet isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
5296f4a2713aSLionel Sambuclet AddedComplexity = 20 in
5297f4a2713aSLionel Sambucdef VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5298f4a2713aSLionel Sambuc                        "vmovq\t{$src, $dst|$dst, $src}",
5299f4a2713aSLionel Sambuc                    [(set VR128:$dst, (v2i64 (X86vzmovl
5300f4a2713aSLionel Sambuc                                             (loadv2i64 addr:$src))))],
5301f4a2713aSLionel Sambuc                                             IIC_SSE_MOVDQ>,
5302f4a2713aSLionel Sambuc                      XS, VEX, Requires<[UseAVX]>;
5303f4a2713aSLionel Sambuclet AddedComplexity = 20 in {
5304f4a2713aSLionel Sambucdef MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5305f4a2713aSLionel Sambuc                        "movq\t{$src, $dst|$dst, $src}",
5306f4a2713aSLionel Sambuc                    [(set VR128:$dst, (v2i64 (X86vzmovl
5307f4a2713aSLionel Sambuc                                             (loadv2i64 addr:$src))))],
5308f4a2713aSLionel Sambuc                                             IIC_SSE_MOVDQ>,
5309f4a2713aSLionel Sambuc                      XS, Requires<[UseSSE2]>;
5310f4a2713aSLionel Sambuc}
5311f4a2713aSLionel Sambuc} // isCodeGenOnly, SchedRW
5312f4a2713aSLionel Sambuc
5313f4a2713aSLionel Sambuclet AddedComplexity = 20 in {
5314f4a2713aSLionel Sambuc  let Predicates = [UseAVX] in {
5315f4a2713aSLionel Sambuc    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
5316f4a2713aSLionel Sambuc              (VMOVZPQILo2PQIrr VR128:$src)>;
5317f4a2713aSLionel Sambuc  }
5318f4a2713aSLionel Sambuc  let Predicates = [UseSSE2] in {
5319f4a2713aSLionel Sambuc    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
5320f4a2713aSLionel Sambuc              (MOVZPQILo2PQIrr VR128:$src)>;
5321f4a2713aSLionel Sambuc  }
5322f4a2713aSLionel Sambuc}
5323f4a2713aSLionel Sambuc
5324f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5325f4a2713aSLionel Sambuc// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
5326f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5327f4a2713aSLionel Sambucmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
5328f4a2713aSLionel Sambuc                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
5329f4a2713aSLionel Sambuc                              X86MemOperand x86memop> {
5330f4a2713aSLionel Sambucdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
5331f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5332f4a2713aSLionel Sambuc                      [(set RC:$dst, (vt (OpNode RC:$src)))],
5333*0a6a1f1dSLionel Sambuc                      IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
5334f4a2713aSLionel Sambucdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
5335f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5336f4a2713aSLionel Sambuc                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
5337*0a6a1f1dSLionel Sambuc                      IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
5338f4a2713aSLionel Sambuc}
5339f4a2713aSLionel Sambuc
5340f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5341f4a2713aSLionel Sambuc  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
5342f4a2713aSLionel Sambuc                                       v4f32, VR128, loadv4f32, f128mem>, VEX;
5343f4a2713aSLionel Sambuc  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
5344f4a2713aSLionel Sambuc                                       v4f32, VR128, loadv4f32, f128mem>, VEX;
5345f4a2713aSLionel Sambuc  defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
5346f4a2713aSLionel Sambuc                                 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
5347f4a2713aSLionel Sambuc  defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
5348f4a2713aSLionel Sambuc                                 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
5349f4a2713aSLionel Sambuc}
5350f4a2713aSLionel Sambucdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
5351f4a2713aSLionel Sambuc                                   memopv4f32, f128mem>;
5352f4a2713aSLionel Sambucdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
5353f4a2713aSLionel Sambuc                                   memopv4f32, f128mem>;
5354f4a2713aSLionel Sambuc
5355f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5356f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
5357f4a2713aSLionel Sambuc            (VMOVSHDUPrr VR128:$src)>;
5358f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
5359f4a2713aSLionel Sambuc            (VMOVSHDUPrm addr:$src)>;
5360f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
5361f4a2713aSLionel Sambuc            (VMOVSLDUPrr VR128:$src)>;
5362f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
5363f4a2713aSLionel Sambuc            (VMOVSLDUPrm addr:$src)>;
5364f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Movshdup VR256:$src)),
5365f4a2713aSLionel Sambuc            (VMOVSHDUPYrr VR256:$src)>;
5366f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
5367f4a2713aSLionel Sambuc            (VMOVSHDUPYrm addr:$src)>;
5368f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Movsldup VR256:$src)),
5369f4a2713aSLionel Sambuc            (VMOVSLDUPYrr VR256:$src)>;
5370f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
5371f4a2713aSLionel Sambuc            (VMOVSLDUPYrm addr:$src)>;
5372f4a2713aSLionel Sambuc}
5373f4a2713aSLionel Sambuc
5374f4a2713aSLionel Sambuclet Predicates = [UseSSE3] in {
5375f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
5376f4a2713aSLionel Sambuc            (MOVSHDUPrr VR128:$src)>;
5377f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
5378f4a2713aSLionel Sambuc            (MOVSHDUPrm addr:$src)>;
5379f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
5380f4a2713aSLionel Sambuc            (MOVSLDUPrr VR128:$src)>;
5381f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
5382f4a2713aSLionel Sambuc            (MOVSLDUPrm addr:$src)>;
5383f4a2713aSLionel Sambuc}
5384f4a2713aSLionel Sambuc
5385f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5386f4a2713aSLionel Sambuc// SSE3 - Replicate Double FP - MOVDDUP
5387f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5388f4a2713aSLionel Sambuc
5389f4a2713aSLionel Sambucmulticlass sse3_replicate_dfp<string OpcodeStr> {
5390*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in
5391f4a2713aSLionel Sambucdef rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5392f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5393*0a6a1f1dSLionel Sambuc                    [], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
5394f4a2713aSLionel Sambucdef rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
5395f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5396f4a2713aSLionel Sambuc                    [(set VR128:$dst,
5397f4a2713aSLionel Sambuc                      (v2f64 (X86Movddup
5398f4a2713aSLionel Sambuc                              (scalar_to_vector (loadf64 addr:$src)))))],
5399*0a6a1f1dSLionel Sambuc                              IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
5400f4a2713aSLionel Sambuc}
5401f4a2713aSLionel Sambuc
5402f4a2713aSLionel Sambuc// FIXME: Merge with above classe when there're patterns for the ymm version
5403f4a2713aSLionel Sambucmulticlass sse3_replicate_dfp_y<string OpcodeStr> {
5404f4a2713aSLionel Sambucdef rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
5405f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5406f4a2713aSLionel Sambuc                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
5407*0a6a1f1dSLionel Sambuc                    Sched<[WriteFShuffle]>;
5408f4a2713aSLionel Sambucdef rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
5409f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5410f4a2713aSLionel Sambuc                    [(set VR256:$dst,
5411f4a2713aSLionel Sambuc                      (v4f64 (X86Movddup
5412f4a2713aSLionel Sambuc                              (scalar_to_vector (loadf64 addr:$src)))))]>,
5413*0a6a1f1dSLionel Sambuc                    Sched<[WriteLoad]>;
5414f4a2713aSLionel Sambuc}
5415f4a2713aSLionel Sambuc
5416f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5417f4a2713aSLionel Sambuc  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
5418f4a2713aSLionel Sambuc  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
5419f4a2713aSLionel Sambuc}
5420f4a2713aSLionel Sambuc
5421f4a2713aSLionel Sambucdefm MOVDDUP : sse3_replicate_dfp<"movddup">;
5422f4a2713aSLionel Sambuc
5423f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5424f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (loadv2f64 addr:$src)),
5425f4a2713aSLionel Sambuc            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5426f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
5427f4a2713aSLionel Sambuc            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5428f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
5429f4a2713aSLionel Sambuc            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5430f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64
5431f4a2713aSLionel Sambuc                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
5432f4a2713aSLionel Sambuc            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5433f4a2713aSLionel Sambuc
5434f4a2713aSLionel Sambuc  // 256-bit version
5435f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (loadv4f64 addr:$src)),
5436f4a2713aSLionel Sambuc            (VMOVDDUPYrm addr:$src)>;
5437f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (loadv4i64 addr:$src)),
5438f4a2713aSLionel Sambuc            (VMOVDDUPYrm addr:$src)>;
5439f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
5440f4a2713aSLionel Sambuc            (VMOVDDUPYrm addr:$src)>;
5441f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (v4i64 VR256:$src)),
5442f4a2713aSLionel Sambuc            (VMOVDDUPYrr VR256:$src)>;
5443f4a2713aSLionel Sambuc}
5444f4a2713aSLionel Sambuc
5445*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX, OptForSize] in {
5446*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
5447*0a6a1f1dSLionel Sambuc  (VMOVDDUPrm addr:$src)>;
5448*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
5449*0a6a1f1dSLionel Sambuc  (VMOVDDUPrm addr:$src)>;
5450*0a6a1f1dSLionel Sambuc}
5451*0a6a1f1dSLionel Sambuc
5452f4a2713aSLionel Sambuclet Predicates = [UseSSE3] in {
5453f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
5454f4a2713aSLionel Sambuc            (MOVDDUPrm addr:$src)>;
5455f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
5456f4a2713aSLionel Sambuc            (MOVDDUPrm addr:$src)>;
5457f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
5458f4a2713aSLionel Sambuc            (MOVDDUPrm addr:$src)>;
5459f4a2713aSLionel Sambuc  def : Pat<(X86Movddup (bc_v2f64
5460f4a2713aSLionel Sambuc                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
5461f4a2713aSLionel Sambuc            (MOVDDUPrm addr:$src)>;
5462f4a2713aSLionel Sambuc}
5463f4a2713aSLionel Sambuc
5464f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5465f4a2713aSLionel Sambuc// SSE3 - Move Unaligned Integer
5466f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5467f4a2713aSLionel Sambuc
5468f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in {
5469f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5470f4a2713aSLionel Sambuc  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5471f4a2713aSLionel Sambuc                   "vlddqu\t{$src, $dst|$dst, $src}",
5472f4a2713aSLionel Sambuc                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
5473f4a2713aSLionel Sambuc  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
5474f4a2713aSLionel Sambuc                   "vlddqu\t{$src, $dst|$dst, $src}",
5475f4a2713aSLionel Sambuc                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
5476f4a2713aSLionel Sambuc                   VEX, VEX_L;
5477f4a2713aSLionel Sambuc}
5478f4a2713aSLionel Sambucdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5479f4a2713aSLionel Sambuc                   "lddqu\t{$src, $dst|$dst, $src}",
5480f4a2713aSLionel Sambuc                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
5481f4a2713aSLionel Sambuc                   IIC_SSE_LDDQU>;
5482f4a2713aSLionel Sambuc}
5483f4a2713aSLionel Sambuc
5484f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5485f4a2713aSLionel Sambuc// SSE3 - Arithmetic
5486f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5487f4a2713aSLionel Sambuc
5488f4a2713aSLionel Sambucmulticlass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
5489f4a2713aSLionel Sambuc                       X86MemOperand x86memop, OpndItins itins,
5490f4a2713aSLionel Sambuc                       bit Is2Addr = 1> {
5491f4a2713aSLionel Sambuc  def rr : I<0xD0, MRMSrcReg,
5492f4a2713aSLionel Sambuc       (outs RC:$dst), (ins RC:$src1, RC:$src2),
5493f4a2713aSLionel Sambuc       !if(Is2Addr,
5494f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5495f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5496f4a2713aSLionel Sambuc       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
5497f4a2713aSLionel Sambuc       Sched<[itins.Sched]>;
5498f4a2713aSLionel Sambuc  def rm : I<0xD0, MRMSrcMem,
5499f4a2713aSLionel Sambuc       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5500f4a2713aSLionel Sambuc       !if(Is2Addr,
5501f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5502f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5503f4a2713aSLionel Sambuc       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
5504f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
5505f4a2713aSLionel Sambuc}
5506f4a2713aSLionel Sambuc
5507f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5508f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in {
5509f4a2713aSLionel Sambuc    defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
5510*0a6a1f1dSLionel Sambuc                                 f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V;
5511f4a2713aSLionel Sambuc    defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
5512*0a6a1f1dSLionel Sambuc                               f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L;
5513f4a2713aSLionel Sambuc  }
5514f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in {
5515f4a2713aSLionel Sambuc    defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
5516*0a6a1f1dSLionel Sambuc                                 f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V;
5517f4a2713aSLionel Sambuc    defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
5518*0a6a1f1dSLionel Sambuc                           f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L;
5519f4a2713aSLionel Sambuc  }
5520f4a2713aSLionel Sambuc}
5521f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
5522f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in
5523f4a2713aSLionel Sambuc  defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
5524*0a6a1f1dSLionel Sambuc                              f128mem, SSE_ALU_F32P>, XD;
5525f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in
5526f4a2713aSLionel Sambuc  defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
5527*0a6a1f1dSLionel Sambuc                              f128mem, SSE_ALU_F64P>, PD;
5528*0a6a1f1dSLionel Sambuc}
5529*0a6a1f1dSLionel Sambuc
5530*0a6a1f1dSLionel Sambuc// Patterns used to select 'addsub' instructions.
5531*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in {
5532*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
5533*0a6a1f1dSLionel Sambuc            (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
5534*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
5535*0a6a1f1dSLionel Sambuc            (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
5536*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
5537*0a6a1f1dSLionel Sambuc            (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
5538*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
5539*0a6a1f1dSLionel Sambuc            (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
5540*0a6a1f1dSLionel Sambuc
5541*0a6a1f1dSLionel Sambuc  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))),
5542*0a6a1f1dSLionel Sambuc            (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
5543*0a6a1f1dSLionel Sambuc  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))),
5544*0a6a1f1dSLionel Sambuc            (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>;
5545*0a6a1f1dSLionel Sambuc  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))),
5546*0a6a1f1dSLionel Sambuc            (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
5547*0a6a1f1dSLionel Sambuc  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
5548*0a6a1f1dSLionel Sambuc            (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
5549*0a6a1f1dSLionel Sambuc}
5550*0a6a1f1dSLionel Sambuc
5551*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE3] in {
5552*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
5553*0a6a1f1dSLionel Sambuc            (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
5554*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
5555*0a6a1f1dSLionel Sambuc            (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
5556*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
5557*0a6a1f1dSLionel Sambuc            (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
5558*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
5559*0a6a1f1dSLionel Sambuc            (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
5560f4a2713aSLionel Sambuc}
5561f4a2713aSLionel Sambuc
5562f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5563f4a2713aSLionel Sambuc// SSE3 Instructions
5564f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5565f4a2713aSLionel Sambuc
5566f4a2713aSLionel Sambuc// Horizontal ops
5567f4a2713aSLionel Sambucmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
5568f4a2713aSLionel Sambuc                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
5569f4a2713aSLionel Sambuc  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
5570f4a2713aSLionel Sambuc       !if(Is2Addr,
5571f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5572f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5573f4a2713aSLionel Sambuc      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
5574f4a2713aSLionel Sambuc      Sched<[WriteFAdd]>;
5575f4a2713aSLionel Sambuc
5576f4a2713aSLionel Sambuc  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5577f4a2713aSLionel Sambuc       !if(Is2Addr,
5578f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5579f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5580f4a2713aSLionel Sambuc      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
5581f4a2713aSLionel Sambuc        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
5582f4a2713aSLionel Sambuc}
5583f4a2713aSLionel Sambucmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
5584f4a2713aSLionel Sambuc                  X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
5585f4a2713aSLionel Sambuc  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
5586f4a2713aSLionel Sambuc       !if(Is2Addr,
5587f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5588f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5589f4a2713aSLionel Sambuc      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
5590f4a2713aSLionel Sambuc      Sched<[WriteFAdd]>;
5591f4a2713aSLionel Sambuc
5592f4a2713aSLionel Sambuc  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5593f4a2713aSLionel Sambuc       !if(Is2Addr,
5594f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5595f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5596f4a2713aSLionel Sambuc      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
5597f4a2713aSLionel Sambuc        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
5598f4a2713aSLionel Sambuc}
5599f4a2713aSLionel Sambuc
5600f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5601f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in {
5602f4a2713aSLionel Sambuc    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
5603f4a2713aSLionel Sambuc                            X86fhadd, 0>, VEX_4V;
5604f4a2713aSLionel Sambuc    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
5605f4a2713aSLionel Sambuc                            X86fhsub, 0>, VEX_4V;
5606f4a2713aSLionel Sambuc    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
5607f4a2713aSLionel Sambuc                            X86fhadd, 0>, VEX_4V, VEX_L;
5608f4a2713aSLionel Sambuc    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
5609f4a2713aSLionel Sambuc                            X86fhsub, 0>, VEX_4V, VEX_L;
5610f4a2713aSLionel Sambuc  }
5611f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in {
5612f4a2713aSLionel Sambuc    defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
5613f4a2713aSLionel Sambuc                            X86fhadd, 0>, VEX_4V;
5614f4a2713aSLionel Sambuc    defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
5615f4a2713aSLionel Sambuc                            X86fhsub, 0>, VEX_4V;
5616f4a2713aSLionel Sambuc    defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
5617f4a2713aSLionel Sambuc                            X86fhadd, 0>, VEX_4V, VEX_L;
5618f4a2713aSLionel Sambuc    defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
5619f4a2713aSLionel Sambuc                            X86fhsub, 0>, VEX_4V, VEX_L;
5620f4a2713aSLionel Sambuc  }
5621f4a2713aSLionel Sambuc}
5622f4a2713aSLionel Sambuc
5623f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
5624f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in {
5625f4a2713aSLionel Sambuc    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
5626f4a2713aSLionel Sambuc    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
5627f4a2713aSLionel Sambuc  }
5628f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in {
5629f4a2713aSLionel Sambuc    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
5630f4a2713aSLionel Sambuc    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
5631f4a2713aSLionel Sambuc  }
5632f4a2713aSLionel Sambuc}
5633f4a2713aSLionel Sambuc
5634f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5635f4a2713aSLionel Sambuc// SSSE3 - Packed Absolute Instructions
5636f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5637f4a2713aSLionel Sambuc
5638f4a2713aSLionel Sambuc
5639f4a2713aSLionel Sambuc/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
5640f4a2713aSLionel Sambucmulticlass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
5641f4a2713aSLionel Sambuc                            Intrinsic IntId128> {
5642f4a2713aSLionel Sambuc  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
5643f4a2713aSLionel Sambuc                    (ins VR128:$src),
5644f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5645f4a2713aSLionel Sambuc                    [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
5646*0a6a1f1dSLionel Sambuc                    Sched<[WriteVecALU]>;
5647f4a2713aSLionel Sambuc
5648f4a2713aSLionel Sambuc  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
5649f4a2713aSLionel Sambuc                    (ins i128mem:$src),
5650f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5651f4a2713aSLionel Sambuc                    [(set VR128:$dst,
5652f4a2713aSLionel Sambuc                      (IntId128
5653f4a2713aSLionel Sambuc                       (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
5654*0a6a1f1dSLionel Sambuc                    Sched<[WriteVecALULd]>;
5655f4a2713aSLionel Sambuc}
5656f4a2713aSLionel Sambuc
5657f4a2713aSLionel Sambuc/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
5658f4a2713aSLionel Sambucmulticlass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
5659f4a2713aSLionel Sambuc                              Intrinsic IntId256> {
5660f4a2713aSLionel Sambuc  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
5661f4a2713aSLionel Sambuc                    (ins VR256:$src),
5662f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5663f4a2713aSLionel Sambuc                    [(set VR256:$dst, (IntId256 VR256:$src))]>,
5664*0a6a1f1dSLionel Sambuc                    Sched<[WriteVecALU]>;
5665f4a2713aSLionel Sambuc
5666f4a2713aSLionel Sambuc  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
5667f4a2713aSLionel Sambuc                    (ins i256mem:$src),
5668f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5669f4a2713aSLionel Sambuc                    [(set VR256:$dst,
5670f4a2713aSLionel Sambuc                      (IntId256
5671*0a6a1f1dSLionel Sambuc                       (bitconvert (memopv4i64 addr:$src))))]>,
5672f4a2713aSLionel Sambuc                    Sched<[WriteVecALULd]>;
5673f4a2713aSLionel Sambuc}
5674f4a2713aSLionel Sambuc
5675f4a2713aSLionel Sambuc// Helper fragments to match sext vXi1 to vXiY.
5676f4a2713aSLionel Sambucdef v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
5677f4a2713aSLionel Sambuc                                               VR128:$src))>;
5678f4a2713aSLionel Sambucdef v8i1sextv8i16  : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
5679f4a2713aSLionel Sambucdef v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
5680f4a2713aSLionel Sambucdef v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
5681f4a2713aSLionel Sambuc                                               VR256:$src))>;
5682f4a2713aSLionel Sambucdef v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
5683f4a2713aSLionel Sambucdef v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
5684f4a2713aSLionel Sambuc
5685f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
5686f4a2713aSLionel Sambuc  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
5687f4a2713aSLionel Sambuc                                  int_x86_ssse3_pabs_b_128>, VEX;
5688f4a2713aSLionel Sambuc  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw",
5689f4a2713aSLionel Sambuc                                  int_x86_ssse3_pabs_w_128>, VEX;
5690f4a2713aSLionel Sambuc  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
5691f4a2713aSLionel Sambuc                                  int_x86_ssse3_pabs_d_128>, VEX;
5692f4a2713aSLionel Sambuc
5693f4a2713aSLionel Sambuc  def : Pat<(xor
5694f4a2713aSLionel Sambuc            (bc_v2i64 (v16i1sextv16i8)),
5695f4a2713aSLionel Sambuc            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
5696f4a2713aSLionel Sambuc            (VPABSBrr128 VR128:$src)>;
5697f4a2713aSLionel Sambuc  def : Pat<(xor
5698f4a2713aSLionel Sambuc            (bc_v2i64 (v8i1sextv8i16)),
5699f4a2713aSLionel Sambuc            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
5700f4a2713aSLionel Sambuc            (VPABSWrr128 VR128:$src)>;
5701f4a2713aSLionel Sambuc  def : Pat<(xor
5702f4a2713aSLionel Sambuc            (bc_v2i64 (v4i1sextv4i32)),
5703f4a2713aSLionel Sambuc            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
5704f4a2713aSLionel Sambuc            (VPABSDrr128 VR128:$src)>;
5705f4a2713aSLionel Sambuc}
5706f4a2713aSLionel Sambuc
5707f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
5708f4a2713aSLionel Sambuc  defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb",
5709f4a2713aSLionel Sambuc                                    int_x86_avx2_pabs_b>, VEX, VEX_L;
5710f4a2713aSLionel Sambuc  defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw",
5711f4a2713aSLionel Sambuc                                    int_x86_avx2_pabs_w>, VEX, VEX_L;
5712f4a2713aSLionel Sambuc  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
5713f4a2713aSLionel Sambuc                                    int_x86_avx2_pabs_d>, VEX, VEX_L;
5714f4a2713aSLionel Sambuc
5715f4a2713aSLionel Sambuc  def : Pat<(xor
5716f4a2713aSLionel Sambuc            (bc_v4i64 (v32i1sextv32i8)),
5717f4a2713aSLionel Sambuc            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
5718f4a2713aSLionel Sambuc            (VPABSBrr256 VR256:$src)>;
5719f4a2713aSLionel Sambuc  def : Pat<(xor
5720f4a2713aSLionel Sambuc            (bc_v4i64 (v16i1sextv16i16)),
5721f4a2713aSLionel Sambuc            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
5722f4a2713aSLionel Sambuc            (VPABSWrr256 VR256:$src)>;
5723f4a2713aSLionel Sambuc  def : Pat<(xor
5724f4a2713aSLionel Sambuc            (bc_v4i64 (v8i1sextv8i32)),
5725f4a2713aSLionel Sambuc            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
5726f4a2713aSLionel Sambuc            (VPABSDrr256 VR256:$src)>;
5727f4a2713aSLionel Sambuc}
5728f4a2713aSLionel Sambuc
5729f4a2713aSLionel Sambucdefm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
5730f4a2713aSLionel Sambuc                              int_x86_ssse3_pabs_b_128>;
5731f4a2713aSLionel Sambucdefm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
5732f4a2713aSLionel Sambuc                              int_x86_ssse3_pabs_w_128>;
5733f4a2713aSLionel Sambucdefm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
5734f4a2713aSLionel Sambuc                              int_x86_ssse3_pabs_d_128>;
5735f4a2713aSLionel Sambuc
5736f4a2713aSLionel Sambuclet Predicates = [HasSSSE3] in {
5737f4a2713aSLionel Sambuc  def : Pat<(xor
5738f4a2713aSLionel Sambuc            (bc_v2i64 (v16i1sextv16i8)),
5739f4a2713aSLionel Sambuc            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
5740f4a2713aSLionel Sambuc            (PABSBrr128 VR128:$src)>;
5741f4a2713aSLionel Sambuc  def : Pat<(xor
5742f4a2713aSLionel Sambuc            (bc_v2i64 (v8i1sextv8i16)),
5743f4a2713aSLionel Sambuc            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
5744f4a2713aSLionel Sambuc            (PABSWrr128 VR128:$src)>;
5745f4a2713aSLionel Sambuc  def : Pat<(xor
5746f4a2713aSLionel Sambuc            (bc_v2i64 (v4i1sextv4i32)),
5747f4a2713aSLionel Sambuc            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
5748f4a2713aSLionel Sambuc            (PABSDrr128 VR128:$src)>;
5749f4a2713aSLionel Sambuc}
5750f4a2713aSLionel Sambuc
5751f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5752f4a2713aSLionel Sambuc// SSSE3 - Packed Binary Operator Instructions
5753f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5754f4a2713aSLionel Sambuc
5755f4a2713aSLionel Sambuclet Sched = WriteVecALU in {
5756f4a2713aSLionel Sambucdef SSE_PHADDSUBD : OpndItins<
5757f4a2713aSLionel Sambuc  IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
5758f4a2713aSLionel Sambuc>;
5759f4a2713aSLionel Sambucdef SSE_PHADDSUBSW : OpndItins<
5760f4a2713aSLionel Sambuc  IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM
5761f4a2713aSLionel Sambuc>;
5762f4a2713aSLionel Sambucdef SSE_PHADDSUBW : OpndItins<
5763f4a2713aSLionel Sambuc  IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
5764f4a2713aSLionel Sambuc>;
5765f4a2713aSLionel Sambuc}
5766f4a2713aSLionel Sambuclet Sched = WriteShuffle in
5767f4a2713aSLionel Sambucdef SSE_PSHUFB : OpndItins<
5768f4a2713aSLionel Sambuc  IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
5769f4a2713aSLionel Sambuc>;
5770f4a2713aSLionel Sambuclet Sched = WriteVecALU in
5771f4a2713aSLionel Sambucdef SSE_PSIGN : OpndItins<
5772f4a2713aSLionel Sambuc  IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
5773f4a2713aSLionel Sambuc>;
5774f4a2713aSLionel Sambuclet Sched = WriteVecIMul in
5775f4a2713aSLionel Sambucdef SSE_PMULHRSW : OpndItins<
5776f4a2713aSLionel Sambuc  IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
5777f4a2713aSLionel Sambuc>;
5778f4a2713aSLionel Sambuc
5779f4a2713aSLionel Sambuc/// SS3I_binop_rm - Simple SSSE3 bin op
5780f4a2713aSLionel Sambucmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5781f4a2713aSLionel Sambuc                         ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
5782f4a2713aSLionel Sambuc                         X86MemOperand x86memop, OpndItins itins,
5783f4a2713aSLionel Sambuc                         bit Is2Addr = 1> {
5784f4a2713aSLionel Sambuc  let isCommutable = 1 in
5785f4a2713aSLionel Sambuc  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
5786f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
5787f4a2713aSLionel Sambuc       !if(Is2Addr,
5788f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5789f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5790f4a2713aSLionel Sambuc       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
5791*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched]>;
5792f4a2713aSLionel Sambuc  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
5793f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
5794f4a2713aSLionel Sambuc       !if(Is2Addr,
5795f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5796f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5797f4a2713aSLionel Sambuc       [(set RC:$dst,
5798f4a2713aSLionel Sambuc         (OpVT (OpNode RC:$src1,
5799*0a6a1f1dSLionel Sambuc          (bitconvert (memop_frag addr:$src2)))))], itins.rm>,
5800f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
5801f4a2713aSLionel Sambuc}
5802f4a2713aSLionel Sambuc
5803f4a2713aSLionel Sambuc/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
5804f4a2713aSLionel Sambucmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
5805f4a2713aSLionel Sambuc                             Intrinsic IntId128, OpndItins itins,
5806f4a2713aSLionel Sambuc                             bit Is2Addr = 1> {
5807f4a2713aSLionel Sambuc  let isCommutable = 1 in
5808f4a2713aSLionel Sambuc  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
5809f4a2713aSLionel Sambuc       (ins VR128:$src1, VR128:$src2),
5810f4a2713aSLionel Sambuc       !if(Is2Addr,
5811f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5812f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5813f4a2713aSLionel Sambuc       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
5814*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched]>;
5815f4a2713aSLionel Sambuc  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
5816f4a2713aSLionel Sambuc       (ins VR128:$src1, i128mem:$src2),
5817f4a2713aSLionel Sambuc       !if(Is2Addr,
5818f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5819f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5820f4a2713aSLionel Sambuc       [(set VR128:$dst,
5821f4a2713aSLionel Sambuc         (IntId128 VR128:$src1,
5822*0a6a1f1dSLionel Sambuc          (bitconvert (memopv2i64 addr:$src2))))]>,
5823f4a2713aSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
5824f4a2713aSLionel Sambuc}
5825f4a2713aSLionel Sambuc
5826f4a2713aSLionel Sambucmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
5827*0a6a1f1dSLionel Sambuc                               Intrinsic IntId256,
5828*0a6a1f1dSLionel Sambuc                               X86FoldableSchedWrite Sched> {
5829f4a2713aSLionel Sambuc  let isCommutable = 1 in
5830f4a2713aSLionel Sambuc  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
5831f4a2713aSLionel Sambuc       (ins VR256:$src1, VR256:$src2),
5832f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5833f4a2713aSLionel Sambuc       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
5834*0a6a1f1dSLionel Sambuc       Sched<[Sched]>;
5835f4a2713aSLionel Sambuc  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
5836f4a2713aSLionel Sambuc       (ins VR256:$src1, i256mem:$src2),
5837f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5838f4a2713aSLionel Sambuc       [(set VR256:$dst,
5839*0a6a1f1dSLionel Sambuc         (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
5840*0a6a1f1dSLionel Sambuc       Sched<[Sched.Folded, ReadAfterLd]>;
5841f4a2713aSLionel Sambuc}
5842f4a2713aSLionel Sambuc
5843f4a2713aSLionel Sambuclet ImmT = NoImm, Predicates = [HasAVX] in {
5844f4a2713aSLionel Sambuclet isCommutable = 0 in {
5845f4a2713aSLionel Sambuc  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
5846f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5847f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V;
5848f4a2713aSLionel Sambuc  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
5849f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5850f4a2713aSLionel Sambuc                                  SSE_PHADDSUBD, 0>, VEX_4V;
5851f4a2713aSLionel Sambuc  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
5852f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5853f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V;
5854f4a2713aSLionel Sambuc  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
5855f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5856f4a2713aSLionel Sambuc                                  SSE_PHADDSUBD, 0>, VEX_4V;
5857f4a2713aSLionel Sambuc  defm VPSIGNB    : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
5858f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5859f4a2713aSLionel Sambuc                                  SSE_PSIGN, 0>, VEX_4V;
5860f4a2713aSLionel Sambuc  defm VPSIGNW    : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
5861f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5862f4a2713aSLionel Sambuc                                  SSE_PSIGN, 0>, VEX_4V;
5863f4a2713aSLionel Sambuc  defm VPSIGND    : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
5864f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5865f4a2713aSLionel Sambuc                                  SSE_PSIGN, 0>, VEX_4V;
5866f4a2713aSLionel Sambuc  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
5867f4a2713aSLionel Sambuc                                  loadv2i64, i128mem,
5868f4a2713aSLionel Sambuc                                  SSE_PSHUFB, 0>, VEX_4V;
5869f4a2713aSLionel Sambuc  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
5870f4a2713aSLionel Sambuc                                      int_x86_ssse3_phadd_sw_128,
5871f4a2713aSLionel Sambuc                                      SSE_PHADDSUBSW, 0>, VEX_4V;
5872f4a2713aSLionel Sambuc  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
5873f4a2713aSLionel Sambuc                                      int_x86_ssse3_phsub_sw_128,
5874f4a2713aSLionel Sambuc                                      SSE_PHADDSUBSW, 0>, VEX_4V;
5875f4a2713aSLionel Sambuc  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
5876f4a2713aSLionel Sambuc                                      int_x86_ssse3_pmadd_ub_sw_128,
5877f4a2713aSLionel Sambuc                                      SSE_PMADD, 0>, VEX_4V;
5878f4a2713aSLionel Sambuc}
5879f4a2713aSLionel Sambucdefm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
5880f4a2713aSLionel Sambuc                                      int_x86_ssse3_pmul_hr_sw_128,
5881f4a2713aSLionel Sambuc                                      SSE_PMULHRSW, 0>, VEX_4V;
5882f4a2713aSLionel Sambuc}
5883f4a2713aSLionel Sambuc
5884f4a2713aSLionel Sambuclet ImmT = NoImm, Predicates = [HasAVX2] in {
5885f4a2713aSLionel Sambuclet isCommutable = 0 in {
5886f4a2713aSLionel Sambuc  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
5887f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5888f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5889f4a2713aSLionel Sambuc  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
5890f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5891f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5892f4a2713aSLionel Sambuc  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
5893f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5894f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5895f4a2713aSLionel Sambuc  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
5896f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5897f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5898f4a2713aSLionel Sambuc  defm VPSIGNBY   : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
5899f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5900f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5901f4a2713aSLionel Sambuc  defm VPSIGNWY   : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
5902f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5903f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5904f4a2713aSLionel Sambuc  defm VPSIGNDY   : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
5905f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5906f4a2713aSLionel Sambuc                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
5907f4a2713aSLionel Sambuc  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
5908f4a2713aSLionel Sambuc                                  loadv4i64, i256mem,
5909*0a6a1f1dSLionel Sambuc                                  SSE_PSHUFB, 0>, VEX_4V, VEX_L;
5910f4a2713aSLionel Sambuc  defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
5911*0a6a1f1dSLionel Sambuc                                        int_x86_avx2_phadd_sw,
5912*0a6a1f1dSLionel Sambuc                                        WriteVecALU>, VEX_4V, VEX_L;
5913f4a2713aSLionel Sambuc  defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
5914*0a6a1f1dSLionel Sambuc                                        int_x86_avx2_phsub_sw,
5915*0a6a1f1dSLionel Sambuc                                        WriteVecALU>, VEX_4V, VEX_L;
5916f4a2713aSLionel Sambuc  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
5917*0a6a1f1dSLionel Sambuc                                       int_x86_avx2_pmadd_ub_sw,
5918*0a6a1f1dSLionel Sambuc                                        WriteVecIMul>, VEX_4V, VEX_L;
5919f4a2713aSLionel Sambuc}
5920f4a2713aSLionel Sambucdefm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
5921*0a6a1f1dSLionel Sambuc                                        int_x86_avx2_pmul_hr_sw,
5922*0a6a1f1dSLionel Sambuc                                        WriteVecIMul>, VEX_4V, VEX_L;
5923f4a2713aSLionel Sambuc}
5924f4a2713aSLionel Sambuc
5925f4a2713aSLionel Sambuc// None of these have i8 immediate fields.
5926f4a2713aSLionel Sambuclet ImmT = NoImm, Constraints = "$src1 = $dst" in {
5927f4a2713aSLionel Sambuclet isCommutable = 0 in {
5928f4a2713aSLionel Sambuc  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
5929f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
5930f4a2713aSLionel Sambuc  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
5931f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
5932f4a2713aSLionel Sambuc  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
5933f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
5934f4a2713aSLionel Sambuc  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
5935f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
5936f4a2713aSLionel Sambuc  defm PSIGNB    : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
5937f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PSIGN>;
5938f4a2713aSLionel Sambuc  defm PSIGNW    : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
5939f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PSIGN>;
5940f4a2713aSLionel Sambuc  defm PSIGND    : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
5941f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PSIGN>;
5942f4a2713aSLionel Sambuc  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
5943f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, SSE_PSHUFB>;
5944f4a2713aSLionel Sambuc  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
5945f4a2713aSLionel Sambuc                                     int_x86_ssse3_phadd_sw_128,
5946f4a2713aSLionel Sambuc                                     SSE_PHADDSUBSW>;
5947f4a2713aSLionel Sambuc  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
5948f4a2713aSLionel Sambuc                                     int_x86_ssse3_phsub_sw_128,
5949f4a2713aSLionel Sambuc                                     SSE_PHADDSUBSW>;
5950f4a2713aSLionel Sambuc  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
5951f4a2713aSLionel Sambuc                                     int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
5952f4a2713aSLionel Sambuc}
5953f4a2713aSLionel Sambucdefm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
5954f4a2713aSLionel Sambuc                                     int_x86_ssse3_pmul_hr_sw_128,
5955f4a2713aSLionel Sambuc                                     SSE_PMULHRSW>;
5956f4a2713aSLionel Sambuc}
5957f4a2713aSLionel Sambuc
5958f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5959f4a2713aSLionel Sambuc// SSSE3 - Packed Align Instruction Patterns
5960f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
5961f4a2713aSLionel Sambuc
5962f4a2713aSLionel Sambucmulticlass ssse3_palignr<string asm, bit Is2Addr = 1> {
5963*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0 in {
5964f4a2713aSLionel Sambuc  def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
5965f4a2713aSLionel Sambuc      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
5966f4a2713aSLionel Sambuc      !if(Is2Addr,
5967f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5968f4a2713aSLionel Sambuc        !strconcat(asm,
5969f4a2713aSLionel Sambuc                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5970*0a6a1f1dSLionel Sambuc      [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>;
5971f4a2713aSLionel Sambuc  let mayLoad = 1 in
5972f4a2713aSLionel Sambuc  def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
5973f4a2713aSLionel Sambuc      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
5974f4a2713aSLionel Sambuc      !if(Is2Addr,
5975f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5976f4a2713aSLionel Sambuc        !strconcat(asm,
5977f4a2713aSLionel Sambuc                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5978*0a6a1f1dSLionel Sambuc      [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>;
5979f4a2713aSLionel Sambuc  }
5980f4a2713aSLionel Sambuc}
5981f4a2713aSLionel Sambuc
5982f4a2713aSLionel Sambucmulticlass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
5983*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0 in {
5984f4a2713aSLionel Sambuc  def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
5985f4a2713aSLionel Sambuc      (ins VR256:$src1, VR256:$src2, i8imm:$src3),
5986f4a2713aSLionel Sambuc      !strconcat(asm,
5987f4a2713aSLionel Sambuc                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5988*0a6a1f1dSLionel Sambuc      []>, Sched<[WriteShuffle]>;
5989f4a2713aSLionel Sambuc  let mayLoad = 1 in
5990f4a2713aSLionel Sambuc  def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
5991f4a2713aSLionel Sambuc      (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
5992f4a2713aSLionel Sambuc      !strconcat(asm,
5993f4a2713aSLionel Sambuc                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5994*0a6a1f1dSLionel Sambuc      []>, Sched<[WriteShuffleLd, ReadAfterLd]>;
5995f4a2713aSLionel Sambuc  }
5996f4a2713aSLionel Sambuc}
5997f4a2713aSLionel Sambuc
5998f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
5999f4a2713aSLionel Sambuc  defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
6000f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
6001f4a2713aSLionel Sambuc  defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
6002f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
6003f4a2713aSLionel Sambuc  defm PALIGN : ssse3_palignr<"palignr">;
6004f4a2713aSLionel Sambuc
6005f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
6006f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6007f4a2713aSLionel Sambuc          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
6008f4a2713aSLionel Sambucdef : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6009f4a2713aSLionel Sambuc          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
6010f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6011f4a2713aSLionel Sambuc          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
6012f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
6013f4a2713aSLionel Sambuc          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
6014f4a2713aSLionel Sambuc}
6015f4a2713aSLionel Sambuc
6016f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
6017f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6018f4a2713aSLionel Sambuc          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6019f4a2713aSLionel Sambucdef : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6020f4a2713aSLionel Sambuc          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6021f4a2713aSLionel Sambucdef : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6022f4a2713aSLionel Sambuc          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6023f4a2713aSLionel Sambucdef : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6024f4a2713aSLionel Sambuc          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6025f4a2713aSLionel Sambuc}
6026f4a2713aSLionel Sambuc
6027f4a2713aSLionel Sambuclet Predicates = [UseSSSE3] in {
6028f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6029f4a2713aSLionel Sambuc          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6030f4a2713aSLionel Sambucdef : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6031f4a2713aSLionel Sambuc          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6032f4a2713aSLionel Sambucdef : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6033f4a2713aSLionel Sambuc          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6034f4a2713aSLionel Sambucdef : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
6035f4a2713aSLionel Sambuc          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
6036f4a2713aSLionel Sambuc}
6037f4a2713aSLionel Sambuc
6038f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
6039f4a2713aSLionel Sambuc// SSSE3 - Thread synchronization
6040f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===//
6041f4a2713aSLionel Sambuc
6042f4a2713aSLionel Sambuclet SchedRW = [WriteSystem] in {
6043f4a2713aSLionel Sambuclet usesCustomInserter = 1 in {
6044f4a2713aSLionel Sambucdef MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
6045f4a2713aSLionel Sambuc                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
6046f4a2713aSLionel Sambuc                Requires<[HasSSE3]>;
6047f4a2713aSLionel Sambuc}
6048f4a2713aSLionel Sambuc
6049f4a2713aSLionel Sambuclet Uses = [EAX, ECX, EDX] in
6050f4a2713aSLionel Sambucdef MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
6051f4a2713aSLionel Sambuc                 TB, Requires<[HasSSE3]>;
6052f4a2713aSLionel Sambuclet Uses = [ECX, EAX] in
6053f4a2713aSLionel Sambucdef MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
6054f4a2713aSLionel Sambuc                [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
6055f4a2713aSLionel Sambuc                TB, Requires<[HasSSE3]>;
6056f4a2713aSLionel Sambuc} // SchedRW
6057f4a2713aSLionel Sambuc
6058*0a6a1f1dSLionel Sambucdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
6059f4a2713aSLionel Sambucdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
6060f4a2713aSLionel Sambuc
6061f4a2713aSLionel Sambucdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>,
6062*0a6a1f1dSLionel Sambuc      Requires<[Not64BitMode]>;
6063f4a2713aSLionel Sambucdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
6064f4a2713aSLionel Sambuc      Requires<[In64BitMode]>;
6065f4a2713aSLionel Sambuc
6066f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6067f4a2713aSLionel Sambuc// SSE4.1 - Packed Move with Sign/Zero Extend
6068f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6069f4a2713aSLionel Sambuc
6070*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
6071*0a6a1f1dSLionel Sambuc                          RegisterClass OutRC, RegisterClass InRC,
6072*0a6a1f1dSLionel Sambuc                          OpndItins itins> {
6073*0a6a1f1dSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
6074f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6075*0a6a1f1dSLionel Sambuc                 [], itins.rr>,
6076*0a6a1f1dSLionel Sambuc                 Sched<[itins.Sched]>;
6077f4a2713aSLionel Sambuc
6078*0a6a1f1dSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
6079f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6080*0a6a1f1dSLionel Sambuc                 [],
6081*0a6a1f1dSLionel Sambuc                 itins.rm>, Sched<[itins.Sched.Folded]>;
6082f4a2713aSLionel Sambuc}
6083f4a2713aSLionel Sambuc
6084*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
6085*0a6a1f1dSLionel Sambuc                          X86MemOperand MemOp, X86MemOperand MemYOp,
6086*0a6a1f1dSLionel Sambuc                          OpndItins SSEItins, OpndItins AVXItins,
6087*0a6a1f1dSLionel Sambuc                          OpndItins AVX2Itins> {
6088*0a6a1f1dSLionel Sambuc  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
6089*0a6a1f1dSLionel Sambuc  let Predicates = [HasAVX] in
6090*0a6a1f1dSLionel Sambuc    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
6091*0a6a1f1dSLionel Sambuc                                     VR128, VR128, AVXItins>, VEX;
6092*0a6a1f1dSLionel Sambuc  let Predicates = [HasAVX2] in
6093*0a6a1f1dSLionel Sambuc    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
6094*0a6a1f1dSLionel Sambuc                                     VR256, VR128, AVX2Itins>, VEX, VEX_L;
6095f4a2713aSLionel Sambuc}
6096f4a2713aSLionel Sambuc
6097*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr,
6098*0a6a1f1dSLionel Sambuc                                X86MemOperand MemOp, X86MemOperand MemYOp> {
6099*0a6a1f1dSLionel Sambuc  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
6100*0a6a1f1dSLionel Sambuc                                        MemOp, MemYOp,
6101*0a6a1f1dSLionel Sambuc                                        SSE_INTALU_ITINS_SHUFF_P,
6102*0a6a1f1dSLionel Sambuc                                        DEFAULT_ITINS_SHUFFLESCHED,
6103*0a6a1f1dSLionel Sambuc                                        DEFAULT_ITINS_SHUFFLESCHED>;
6104*0a6a1f1dSLionel Sambuc  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
6105*0a6a1f1dSLionel Sambuc                                        !strconcat("pmovzx", OpcodeStr),
6106*0a6a1f1dSLionel Sambuc                                        MemOp, MemYOp,
6107*0a6a1f1dSLionel Sambuc                                        SSE_INTALU_ITINS_SHUFF_P,
6108*0a6a1f1dSLionel Sambuc                                        DEFAULT_ITINS_SHUFFLESCHED,
6109*0a6a1f1dSLionel Sambuc                                        DEFAULT_ITINS_SHUFFLESCHED>;
6110*0a6a1f1dSLionel Sambuc}
6111*0a6a1f1dSLionel Sambuc
6112*0a6a1f1dSLionel Sambucdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>;
6113*0a6a1f1dSLionel Sambucdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>;
6114*0a6a1f1dSLionel Sambucdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>;
6115*0a6a1f1dSLionel Sambuc
6116*0a6a1f1dSLionel Sambucdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>;
6117*0a6a1f1dSLionel Sambucdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>;
6118*0a6a1f1dSLionel Sambuc
6119*0a6a1f1dSLionel Sambucdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
6120*0a6a1f1dSLionel Sambuc
6121*0a6a1f1dSLionel Sambuc// AVX2 Patterns
6122*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
6123*0a6a1f1dSLionel Sambuc  // Register-Register patterns
6124*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
6125*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
6126*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
6127*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
6128*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
6129*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
6130*0a6a1f1dSLionel Sambuc
6131*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
6132*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
6133*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
6134*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
6135*0a6a1f1dSLionel Sambuc
6136*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
6137*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
6138*0a6a1f1dSLionel Sambuc
6139*0a6a1f1dSLionel Sambuc  // On AVX2, we also support 256bit inputs.
6140*0a6a1f1dSLionel Sambuc  // FIXME: remove these patterns when the old shuffle lowering goes away.
6141*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))),
6142*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6143*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))),
6144*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6145*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))),
6146*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6147*0a6a1f1dSLionel Sambuc
6148*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v16i16 VR256:$src))),
6149*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6150*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v16i16 VR256:$src))),
6151*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6152*0a6a1f1dSLionel Sambuc
6153*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
6154*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
6155*0a6a1f1dSLionel Sambuc
6156*0a6a1f1dSLionel Sambuc  // AVX2 Register-Memory patterns
6157*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6158*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
6159*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
6160*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
6161*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6162*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
6163*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6164*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
6165*0a6a1f1dSLionel Sambuc
6166*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
6167*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
6168*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
6169*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
6170*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6171*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
6172*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6173*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
6174*0a6a1f1dSLionel Sambuc
6175*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
6176*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
6177*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
6178*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
6179*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6180*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
6181*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6182*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
6183*0a6a1f1dSLionel Sambuc
6184*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
6185*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
6186*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
6187*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
6188*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
6189*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
6190*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
6191*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
6192*0a6a1f1dSLionel Sambuc
6193*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
6194*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
6195*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
6196*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
6197*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
6198*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
6199*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
6200*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
6201*0a6a1f1dSLionel Sambuc
6202*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
6203*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
6204*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
6205*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
6206*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
6207*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
6208*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
6209*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
6210f4a2713aSLionel Sambuc}
6211f4a2713aSLionel Sambuc
6212f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
6213*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
6214*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
6215f4a2713aSLionel Sambuc}
6216f4a2713aSLionel Sambuc
6217*0a6a1f1dSLionel Sambuc// SSE4.1/AVX patterns.
6218*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
6219*0a6a1f1dSLionel Sambuc                                PatFrag ExtLoad16> {
6220*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
6221*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
6222*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
6223*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
6224*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
6225*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
6226*0a6a1f1dSLionel Sambuc
6227*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
6228*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
6229*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
6230*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
6231*0a6a1f1dSLionel Sambuc
6232*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
6233*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
6234*0a6a1f1dSLionel Sambuc
6235*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
6236*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
6237*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
6238*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
6239*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
6240*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
6241*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6242*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
6243*0a6a1f1dSLionel Sambuc  def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6244*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
6245*0a6a1f1dSLionel Sambuc
6246*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
6247*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
6248*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
6249*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
6250*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6251*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
6252*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6253*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
6254*0a6a1f1dSLionel Sambuc
6255*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
6256*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
6257*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
6258*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
6259*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
6260*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
6261*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
6262*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
6263*0a6a1f1dSLionel Sambuc
6264*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
6265*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
6266*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
6267*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
6268*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
6269*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
6270*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
6271*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
6272*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
6273*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
6274*0a6a1f1dSLionel Sambuc
6275*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
6276*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
6277*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
6278*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
6279*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
6280*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
6281*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
6282*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
6283*0a6a1f1dSLionel Sambuc
6284*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
6285*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
6286*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
6287*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
6288*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
6289*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
6290*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
6291*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
6292*0a6a1f1dSLionel Sambuc  def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
6293*0a6a1f1dSLionel Sambuc            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
6294*0a6a1f1dSLionel Sambuc}
6295f4a2713aSLionel Sambuc
6296f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
6297*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
6298*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
6299f4a2713aSLionel Sambuc}
6300f4a2713aSLionel Sambuc
6301f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in {
6302*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
6303*0a6a1f1dSLionel Sambuc  defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
6304f4a2713aSLionel Sambuc}
6305f4a2713aSLionel Sambuc
6306f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6307f4a2713aSLionel Sambuc// SSE4.1 - Extract Instructions
6308f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6309f4a2713aSLionel Sambuc
6310f4a2713aSLionel Sambuc/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
6311f4a2713aSLionel Sambucmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> {
6312f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
6313f4a2713aSLionel Sambuc                 (ins VR128:$src1, i32i8imm:$src2),
6314f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6315f4a2713aSLionel Sambuc                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6316f4a2713aSLionel Sambuc                 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
6317f4a2713aSLionel Sambuc                                         imm:$src2))]>,
6318*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffle]>;
6319*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0, mayStore = 1,
6320*0a6a1f1dSLionel Sambuc      SchedRW = [WriteShuffleLd, WriteRMW] in
6321f4a2713aSLionel Sambuc  def mr : SS4AIi8<opc, MRMDestMem, (outs),
6322f4a2713aSLionel Sambuc                 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
6323f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6324f4a2713aSLionel Sambuc                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6325*0a6a1f1dSLionel Sambuc                 [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1),
6326*0a6a1f1dSLionel Sambuc						 imm:$src2)))), addr:$dst)]>;
6327f4a2713aSLionel Sambuc}
6328f4a2713aSLionel Sambuc
6329f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6330f4a2713aSLionel Sambuc  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
6331f4a2713aSLionel Sambuc
6332f4a2713aSLionel Sambucdefm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
6333f4a2713aSLionel Sambuc
6334f4a2713aSLionel Sambuc
6335f4a2713aSLionel Sambuc/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
6336f4a2713aSLionel Sambucmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> {
6337*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
6338f4a2713aSLionel Sambuc  def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
6339f4a2713aSLionel Sambuc                   (ins VR128:$src1, i32i8imm:$src2),
6340f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr,
6341f4a2713aSLionel Sambuc                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6342*0a6a1f1dSLionel Sambuc                   []>, Sched<[WriteShuffle]>;
6343f4a2713aSLionel Sambuc
6344*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0, mayStore = 1,
6345*0a6a1f1dSLionel Sambuc      SchedRW = [WriteShuffleLd, WriteRMW] in
6346f4a2713aSLionel Sambuc  def mr : SS4AIi8<opc, MRMDestMem, (outs),
6347f4a2713aSLionel Sambuc                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
6348f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6349f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6350*0a6a1f1dSLionel Sambuc                 [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1),
6351*0a6a1f1dSLionel Sambuc						  imm:$src2)))), addr:$dst)]>;
6352f4a2713aSLionel Sambuc}
6353f4a2713aSLionel Sambuc
6354f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6355f4a2713aSLionel Sambuc  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
6356f4a2713aSLionel Sambuc
6357f4a2713aSLionel Sambucdefm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
6358f4a2713aSLionel Sambuc
6359f4a2713aSLionel Sambuc
6360f4a2713aSLionel Sambuc/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
6361f4a2713aSLionel Sambucmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> {
6362f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
6363f4a2713aSLionel Sambuc                 (ins VR128:$src1, i32i8imm:$src2),
6364f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6365f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6366f4a2713aSLionel Sambuc                 [(set GR32:$dst,
6367*0a6a1f1dSLionel Sambuc                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
6368*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffle]>;
6369*0a6a1f1dSLionel Sambuc  let SchedRW = [WriteShuffleLd, WriteRMW] in
6370f4a2713aSLionel Sambuc  def mr : SS4AIi8<opc, MRMDestMem, (outs),
6371f4a2713aSLionel Sambuc                 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
6372f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6373f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6374f4a2713aSLionel Sambuc                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
6375*0a6a1f1dSLionel Sambuc                          addr:$dst)]>;
6376f4a2713aSLionel Sambuc}
6377f4a2713aSLionel Sambuc
6378f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6379f4a2713aSLionel Sambuc  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
6380f4a2713aSLionel Sambuc
6381f4a2713aSLionel Sambucdefm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
6382f4a2713aSLionel Sambuc
6383f4a2713aSLionel Sambuc/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
6384f4a2713aSLionel Sambucmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> {
6385f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
6386f4a2713aSLionel Sambuc                 (ins VR128:$src1, i32i8imm:$src2),
6387f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6388f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6389f4a2713aSLionel Sambuc                 [(set GR64:$dst,
6390*0a6a1f1dSLionel Sambuc                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>,
6391*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffle]>, REX_W;
6392*0a6a1f1dSLionel Sambuc  let SchedRW = [WriteShuffleLd, WriteRMW] in
6393f4a2713aSLionel Sambuc  def mr : SS4AIi8<opc, MRMDestMem, (outs),
6394f4a2713aSLionel Sambuc                 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
6395f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6396f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6397f4a2713aSLionel Sambuc                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
6398*0a6a1f1dSLionel Sambuc                          addr:$dst)]>, REX_W;
6399f4a2713aSLionel Sambuc}
6400f4a2713aSLionel Sambuc
6401f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6402f4a2713aSLionel Sambuc  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
6403f4a2713aSLionel Sambuc
6404f4a2713aSLionel Sambucdefm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
6405f4a2713aSLionel Sambuc
6406f4a2713aSLionel Sambuc/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
6407f4a2713aSLionel Sambuc/// destination
6408f4a2713aSLionel Sambucmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr,
6409f4a2713aSLionel Sambuc                            OpndItins itins = DEFAULT_ITINS> {
6410f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
6411f4a2713aSLionel Sambuc                 (ins VR128:$src1, i32i8imm:$src2),
6412f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6413f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6414f4a2713aSLionel Sambuc                 [(set GR32orGR64:$dst,
6415f4a2713aSLionel Sambuc                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))],
6416*0a6a1f1dSLionel Sambuc                    itins.rr>, Sched<[WriteFBlend]>;
6417*0a6a1f1dSLionel Sambuc  let SchedRW = [WriteFBlendLd, WriteRMW] in
6418f4a2713aSLionel Sambuc  def mr : SS4AIi8<opc, MRMDestMem, (outs),
6419f4a2713aSLionel Sambuc                 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
6420f4a2713aSLionel Sambuc                 !strconcat(OpcodeStr,
6421f4a2713aSLionel Sambuc                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6422f4a2713aSLionel Sambuc                 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
6423*0a6a1f1dSLionel Sambuc                          addr:$dst)], itins.rm>;
6424f4a2713aSLionel Sambuc}
6425f4a2713aSLionel Sambuc
6426f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
6427f4a2713aSLionel Sambuc  let Predicates = [UseAVX] in
6428f4a2713aSLionel Sambuc    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
6429f4a2713aSLionel Sambuc  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
6430f4a2713aSLionel Sambuc}
6431f4a2713aSLionel Sambuc
6432f4a2713aSLionel Sambuc// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
6433f4a2713aSLionel Sambucdef : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
6434f4a2713aSLionel Sambuc                                              imm:$src2))),
6435f4a2713aSLionel Sambuc                 addr:$dst),
6436f4a2713aSLionel Sambuc          (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
6437f4a2713aSLionel Sambuc          Requires<[HasAVX]>;
6438f4a2713aSLionel Sambucdef : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
6439f4a2713aSLionel Sambuc                                              imm:$src2))),
6440f4a2713aSLionel Sambuc                 addr:$dst),
6441f4a2713aSLionel Sambuc          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
6442f4a2713aSLionel Sambuc          Requires<[UseSSE41]>;
6443f4a2713aSLionel Sambuc
6444f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6445f4a2713aSLionel Sambuc// SSE4.1 - Insert Instructions
6446f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6447f4a2713aSLionel Sambuc
6448f4a2713aSLionel Sambucmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
6449f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6450f4a2713aSLionel Sambuc      (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3),
6451f4a2713aSLionel Sambuc      !if(Is2Addr,
6452f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6453f4a2713aSLionel Sambuc        !strconcat(asm,
6454f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6455f4a2713aSLionel Sambuc      [(set VR128:$dst,
6456*0a6a1f1dSLionel Sambuc        (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
6457*0a6a1f1dSLionel Sambuc      Sched<[WriteShuffle]>;
6458f4a2713aSLionel Sambuc  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6459f4a2713aSLionel Sambuc      (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
6460f4a2713aSLionel Sambuc      !if(Is2Addr,
6461f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6462f4a2713aSLionel Sambuc        !strconcat(asm,
6463f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6464f4a2713aSLionel Sambuc      [(set VR128:$dst,
6465f4a2713aSLionel Sambuc        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
6466*0a6a1f1dSLionel Sambuc                   imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
6467f4a2713aSLionel Sambuc}
6468f4a2713aSLionel Sambuc
6469f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6470f4a2713aSLionel Sambuc  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
6471f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
6472f4a2713aSLionel Sambuc  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
6473f4a2713aSLionel Sambuc
6474f4a2713aSLionel Sambucmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
6475f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6476f4a2713aSLionel Sambuc      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
6477f4a2713aSLionel Sambuc      !if(Is2Addr,
6478f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6479f4a2713aSLionel Sambuc        !strconcat(asm,
6480f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6481f4a2713aSLionel Sambuc      [(set VR128:$dst,
6482f4a2713aSLionel Sambuc        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
6483*0a6a1f1dSLionel Sambuc      Sched<[WriteShuffle]>;
6484f4a2713aSLionel Sambuc  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6485f4a2713aSLionel Sambuc      (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
6486f4a2713aSLionel Sambuc      !if(Is2Addr,
6487f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6488f4a2713aSLionel Sambuc        !strconcat(asm,
6489f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6490f4a2713aSLionel Sambuc      [(set VR128:$dst,
6491f4a2713aSLionel Sambuc        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
6492*0a6a1f1dSLionel Sambuc                          imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
6493f4a2713aSLionel Sambuc}
6494f4a2713aSLionel Sambuc
6495f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6496f4a2713aSLionel Sambuc  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
6497f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
6498f4a2713aSLionel Sambuc  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
6499f4a2713aSLionel Sambuc
6500f4a2713aSLionel Sambucmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
6501f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6502f4a2713aSLionel Sambuc      (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
6503f4a2713aSLionel Sambuc      !if(Is2Addr,
6504f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6505f4a2713aSLionel Sambuc        !strconcat(asm,
6506f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6507f4a2713aSLionel Sambuc      [(set VR128:$dst,
6508f4a2713aSLionel Sambuc        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
6509*0a6a1f1dSLionel Sambuc      Sched<[WriteShuffle]>;
6510f4a2713aSLionel Sambuc  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6511f4a2713aSLionel Sambuc      (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
6512f4a2713aSLionel Sambuc      !if(Is2Addr,
6513f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6514f4a2713aSLionel Sambuc        !strconcat(asm,
6515f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6516f4a2713aSLionel Sambuc      [(set VR128:$dst,
6517f4a2713aSLionel Sambuc        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
6518*0a6a1f1dSLionel Sambuc                          imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
6519f4a2713aSLionel Sambuc}
6520f4a2713aSLionel Sambuc
6521f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6522f4a2713aSLionel Sambuc  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
6523f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
6524f4a2713aSLionel Sambuc  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
6525f4a2713aSLionel Sambuc
6526f4a2713aSLionel Sambuc// insertps has a few different modes, there's the first two here below which
6527f4a2713aSLionel Sambuc// are optimized inserts that won't zero arbitrary elements in the destination
6528f4a2713aSLionel Sambuc// vector. The next one matches the intrinsic and could zero arbitrary elements
6529f4a2713aSLionel Sambuc// in the target vector.
6530f4a2713aSLionel Sambucmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
6531f4a2713aSLionel Sambuc                           OpndItins itins = DEFAULT_ITINS> {
6532f4a2713aSLionel Sambuc  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6533*0a6a1f1dSLionel Sambuc      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6534f4a2713aSLionel Sambuc      !if(Is2Addr,
6535f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6536f4a2713aSLionel Sambuc        !strconcat(asm,
6537f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6538f4a2713aSLionel Sambuc      [(set VR128:$dst,
6539*0a6a1f1dSLionel Sambuc        (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
6540*0a6a1f1dSLionel Sambuc      Sched<[WriteFShuffle]>;
6541f4a2713aSLionel Sambuc  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6542*0a6a1f1dSLionel Sambuc      (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
6543f4a2713aSLionel Sambuc      !if(Is2Addr,
6544f4a2713aSLionel Sambuc        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6545f4a2713aSLionel Sambuc        !strconcat(asm,
6546f4a2713aSLionel Sambuc                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6547f4a2713aSLionel Sambuc      [(set VR128:$dst,
6548*0a6a1f1dSLionel Sambuc        (X86insertps VR128:$src1,
6549f4a2713aSLionel Sambuc                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
6550*0a6a1f1dSLionel Sambuc                    imm:$src3))], itins.rm>,
6551*0a6a1f1dSLionel Sambuc      Sched<[WriteFShuffleLd, ReadAfterLd]>;
6552f4a2713aSLionel Sambuc}
6553f4a2713aSLionel Sambuc
6554f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
6555f4a2713aSLionel Sambuc  let Predicates = [UseAVX] in
6556f4a2713aSLionel Sambuc    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
6557f4a2713aSLionel Sambuc  let Constraints = "$src1 = $dst" in
6558f4a2713aSLionel Sambuc    defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
6559f4a2713aSLionel Sambuc}
6560f4a2713aSLionel Sambuc
6561*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in {
6562*0a6a1f1dSLionel Sambuc  // If we're inserting an element from a load or a null pshuf of a load,
6563*0a6a1f1dSLionel Sambuc  // fold the load into the insertps instruction.
6564*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32
6565*0a6a1f1dSLionel Sambuc                       (scalar_to_vector (loadf32 addr:$src2))), (i8 0)),
6566*0a6a1f1dSLionel Sambuc                   imm:$src3)),
6567*0a6a1f1dSLionel Sambuc            (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
6568*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd
6569*0a6a1f1dSLionel Sambuc                      (loadv4f32 addr:$src2), (i8 0)), imm:$src3)),
6570*0a6a1f1dSLionel Sambuc            (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
6571*0a6a1f1dSLionel Sambuc}
6572*0a6a1f1dSLionel Sambuc
6573*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in {
6574*0a6a1f1dSLionel Sambuc  // If we're inserting an element from a vbroadcast of a load, fold the
6575*0a6a1f1dSLionel Sambuc  // load into the X86insertps instruction.
6576*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
6577*0a6a1f1dSLionel Sambuc                (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)),
6578*0a6a1f1dSLionel Sambuc            (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
6579*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
6580*0a6a1f1dSLionel Sambuc                (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)),
6581*0a6a1f1dSLionel Sambuc            (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
6582*0a6a1f1dSLionel Sambuc}
6583*0a6a1f1dSLionel Sambuc
6584f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6585f4a2713aSLionel Sambuc// SSE4.1 - Round Instructions
6586f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6587f4a2713aSLionel Sambuc
6588f4a2713aSLionel Sambucmulticlass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
6589f4a2713aSLionel Sambuc                            X86MemOperand x86memop, RegisterClass RC,
6590f4a2713aSLionel Sambuc                            PatFrag mem_frag32, PatFrag mem_frag64,
6591f4a2713aSLionel Sambuc                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
6592f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
6593f4a2713aSLionel Sambuc  // Intrinsic operation, reg.
6594f4a2713aSLionel Sambuc  // Vector intrinsic operation, reg
6595f4a2713aSLionel Sambuc  def PSr : SS4AIi8<opcps, MRMSrcReg,
6596f4a2713aSLionel Sambuc                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
6597f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
6598f4a2713aSLionel Sambuc                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6599f4a2713aSLionel Sambuc                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
6600*0a6a1f1dSLionel Sambuc                    IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>;
6601f4a2713aSLionel Sambuc
6602f4a2713aSLionel Sambuc  // Vector intrinsic operation, mem
6603f4a2713aSLionel Sambuc  def PSm : SS4AIi8<opcps, MRMSrcMem,
6604f4a2713aSLionel Sambuc                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
6605f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
6606f4a2713aSLionel Sambuc                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6607f4a2713aSLionel Sambuc                    [(set RC:$dst,
6608f4a2713aSLionel Sambuc                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))],
6609*0a6a1f1dSLionel Sambuc                          IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>;
6610f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedSingle
6611f4a2713aSLionel Sambuc
6612f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in {
6613f4a2713aSLionel Sambuc  // Vector intrinsic operation, reg
6614f4a2713aSLionel Sambuc  def PDr : SS4AIi8<opcpd, MRMSrcReg,
6615f4a2713aSLionel Sambuc                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
6616f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
6617f4a2713aSLionel Sambuc                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6618f4a2713aSLionel Sambuc                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
6619*0a6a1f1dSLionel Sambuc                    IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>;
6620f4a2713aSLionel Sambuc
6621f4a2713aSLionel Sambuc  // Vector intrinsic operation, mem
6622f4a2713aSLionel Sambuc  def PDm : SS4AIi8<opcpd, MRMSrcMem,
6623f4a2713aSLionel Sambuc                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
6624f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
6625f4a2713aSLionel Sambuc                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6626f4a2713aSLionel Sambuc                    [(set RC:$dst,
6627f4a2713aSLionel Sambuc                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))],
6628*0a6a1f1dSLionel Sambuc                          IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAddLd]>;
6629f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedDouble
6630f4a2713aSLionel Sambuc}
6631f4a2713aSLionel Sambuc
6632f4a2713aSLionel Sambucmulticlass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
6633f4a2713aSLionel Sambuc                            string OpcodeStr,
6634f4a2713aSLionel Sambuc                            Intrinsic F32Int,
6635f4a2713aSLionel Sambuc                            Intrinsic F64Int, bit Is2Addr = 1> {
6636f4a2713aSLionel Sambuclet ExeDomain = GenericDomain in {
6637f4a2713aSLionel Sambuc  // Operation, reg.
6638f4a2713aSLionel Sambuc  let hasSideEffects = 0 in
6639f4a2713aSLionel Sambuc  def SSr : SS4AIi8<opcss, MRMSrcReg,
6640f4a2713aSLionel Sambuc      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
6641f4a2713aSLionel Sambuc      !if(Is2Addr,
6642f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,
6643f4a2713aSLionel Sambuc              "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6644f4a2713aSLionel Sambuc          !strconcat(OpcodeStr,
6645f4a2713aSLionel Sambuc              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6646*0a6a1f1dSLionel Sambuc      []>, Sched<[WriteFAdd]>;
6647f4a2713aSLionel Sambuc
6648f4a2713aSLionel Sambuc  // Intrinsic operation, reg.
6649*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in
6650f4a2713aSLionel Sambuc  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
6651f4a2713aSLionel Sambuc        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
6652f4a2713aSLionel Sambuc        !if(Is2Addr,
6653f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6654f4a2713aSLionel Sambuc                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6655f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6656f4a2713aSLionel Sambuc                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6657f4a2713aSLionel Sambuc        [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
6658*0a6a1f1dSLionel Sambuc        Sched<[WriteFAdd]>;
6659f4a2713aSLionel Sambuc
6660f4a2713aSLionel Sambuc  // Intrinsic operation, mem.
6661f4a2713aSLionel Sambuc  def SSm : SS4AIi8<opcss, MRMSrcMem,
6662f4a2713aSLionel Sambuc        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
6663f4a2713aSLionel Sambuc        !if(Is2Addr,
6664f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6665f4a2713aSLionel Sambuc                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6666f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6667f4a2713aSLionel Sambuc                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6668f4a2713aSLionel Sambuc        [(set VR128:$dst,
6669f4a2713aSLionel Sambuc             (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
6670*0a6a1f1dSLionel Sambuc        Sched<[WriteFAddLd, ReadAfterLd]>;
6671f4a2713aSLionel Sambuc
6672f4a2713aSLionel Sambuc  // Operation, reg.
6673f4a2713aSLionel Sambuc  let hasSideEffects = 0 in
6674f4a2713aSLionel Sambuc  def SDr : SS4AIi8<opcsd, MRMSrcReg,
6675f4a2713aSLionel Sambuc        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
6676f4a2713aSLionel Sambuc        !if(Is2Addr,
6677f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6678f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6679f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6680f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6681*0a6a1f1dSLionel Sambuc        []>, Sched<[WriteFAdd]>;
6682f4a2713aSLionel Sambuc
6683f4a2713aSLionel Sambuc  // Intrinsic operation, reg.
6684*0a6a1f1dSLionel Sambuc  let isCodeGenOnly = 1 in
6685f4a2713aSLionel Sambuc  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
6686f4a2713aSLionel Sambuc        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
6687f4a2713aSLionel Sambuc        !if(Is2Addr,
6688f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6689f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6690f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6691f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6692f4a2713aSLionel Sambuc        [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
6693*0a6a1f1dSLionel Sambuc        Sched<[WriteFAdd]>;
6694f4a2713aSLionel Sambuc
6695f4a2713aSLionel Sambuc  // Intrinsic operation, mem.
6696f4a2713aSLionel Sambuc  def SDm : SS4AIi8<opcsd, MRMSrcMem,
6697f4a2713aSLionel Sambuc        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
6698f4a2713aSLionel Sambuc        !if(Is2Addr,
6699f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6700f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6701f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
6702f4a2713aSLionel Sambuc                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6703f4a2713aSLionel Sambuc        [(set VR128:$dst,
6704f4a2713aSLionel Sambuc              (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
6705*0a6a1f1dSLionel Sambuc        Sched<[WriteFAddLd, ReadAfterLd]>;
6706f4a2713aSLionel Sambuc} // ExeDomain = GenericDomain
6707f4a2713aSLionel Sambuc}
6708f4a2713aSLionel Sambuc
6709f4a2713aSLionel Sambuc// FP round - roundss, roundps, roundsd, roundpd
6710f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
6711f4a2713aSLionel Sambuc  // Intrinsic form
6712f4a2713aSLionel Sambuc  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
6713f4a2713aSLionel Sambuc                                  loadv4f32, loadv2f64,
6714f4a2713aSLionel Sambuc                                  int_x86_sse41_round_ps,
6715f4a2713aSLionel Sambuc                                  int_x86_sse41_round_pd>, VEX;
6716f4a2713aSLionel Sambuc  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
6717f4a2713aSLionel Sambuc                                  loadv8f32, loadv4f64,
6718f4a2713aSLionel Sambuc                                  int_x86_avx_round_ps_256,
6719f4a2713aSLionel Sambuc                                  int_x86_avx_round_pd_256>, VEX, VEX_L;
6720f4a2713aSLionel Sambuc  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
6721f4a2713aSLionel Sambuc                                  int_x86_sse41_round_ss,
6722f4a2713aSLionel Sambuc                                  int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
6723f4a2713aSLionel Sambuc
6724f4a2713aSLionel Sambuc  def : Pat<(ffloor FR32:$src),
6725f4a2713aSLionel Sambuc            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
6726f4a2713aSLionel Sambuc  def : Pat<(f64 (ffloor FR64:$src)),
6727f4a2713aSLionel Sambuc            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
6728f4a2713aSLionel Sambuc  def : Pat<(f32 (fnearbyint FR32:$src)),
6729f4a2713aSLionel Sambuc            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
6730f4a2713aSLionel Sambuc  def : Pat<(f64 (fnearbyint FR64:$src)),
6731f4a2713aSLionel Sambuc            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
6732f4a2713aSLionel Sambuc  def : Pat<(f32 (fceil FR32:$src)),
6733f4a2713aSLionel Sambuc            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
6734f4a2713aSLionel Sambuc  def : Pat<(f64 (fceil FR64:$src)),
6735f4a2713aSLionel Sambuc            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
6736f4a2713aSLionel Sambuc  def : Pat<(f32 (frint FR32:$src)),
6737f4a2713aSLionel Sambuc            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
6738f4a2713aSLionel Sambuc  def : Pat<(f64 (frint FR64:$src)),
6739f4a2713aSLionel Sambuc            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
6740f4a2713aSLionel Sambuc  def : Pat<(f32 (ftrunc FR32:$src)),
6741f4a2713aSLionel Sambuc            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
6742f4a2713aSLionel Sambuc  def : Pat<(f64 (ftrunc FR64:$src)),
6743f4a2713aSLionel Sambuc            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
6744f4a2713aSLionel Sambuc
6745f4a2713aSLionel Sambuc  def : Pat<(v4f32 (ffloor VR128:$src)),
6746f4a2713aSLionel Sambuc            (VROUNDPSr VR128:$src, (i32 0x1))>;
6747f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fnearbyint VR128:$src)),
6748f4a2713aSLionel Sambuc            (VROUNDPSr VR128:$src, (i32 0xC))>;
6749f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fceil VR128:$src)),
6750f4a2713aSLionel Sambuc            (VROUNDPSr VR128:$src, (i32 0x2))>;
6751f4a2713aSLionel Sambuc  def : Pat<(v4f32 (frint VR128:$src)),
6752f4a2713aSLionel Sambuc            (VROUNDPSr VR128:$src, (i32 0x4))>;
6753f4a2713aSLionel Sambuc  def : Pat<(v4f32 (ftrunc VR128:$src)),
6754f4a2713aSLionel Sambuc            (VROUNDPSr VR128:$src, (i32 0x3))>;
6755f4a2713aSLionel Sambuc
6756f4a2713aSLionel Sambuc  def : Pat<(v2f64 (ffloor VR128:$src)),
6757f4a2713aSLionel Sambuc            (VROUNDPDr VR128:$src, (i32 0x1))>;
6758f4a2713aSLionel Sambuc  def : Pat<(v2f64 (fnearbyint VR128:$src)),
6759f4a2713aSLionel Sambuc            (VROUNDPDr VR128:$src, (i32 0xC))>;
6760f4a2713aSLionel Sambuc  def : Pat<(v2f64 (fceil VR128:$src)),
6761f4a2713aSLionel Sambuc            (VROUNDPDr VR128:$src, (i32 0x2))>;
6762f4a2713aSLionel Sambuc  def : Pat<(v2f64 (frint VR128:$src)),
6763f4a2713aSLionel Sambuc            (VROUNDPDr VR128:$src, (i32 0x4))>;
6764f4a2713aSLionel Sambuc  def : Pat<(v2f64 (ftrunc VR128:$src)),
6765f4a2713aSLionel Sambuc            (VROUNDPDr VR128:$src, (i32 0x3))>;
6766f4a2713aSLionel Sambuc
6767f4a2713aSLionel Sambuc  def : Pat<(v8f32 (ffloor VR256:$src)),
6768f4a2713aSLionel Sambuc            (VROUNDYPSr VR256:$src, (i32 0x1))>;
6769f4a2713aSLionel Sambuc  def : Pat<(v8f32 (fnearbyint VR256:$src)),
6770f4a2713aSLionel Sambuc            (VROUNDYPSr VR256:$src, (i32 0xC))>;
6771f4a2713aSLionel Sambuc  def : Pat<(v8f32 (fceil VR256:$src)),
6772f4a2713aSLionel Sambuc            (VROUNDYPSr VR256:$src, (i32 0x2))>;
6773f4a2713aSLionel Sambuc  def : Pat<(v8f32 (frint VR256:$src)),
6774f4a2713aSLionel Sambuc            (VROUNDYPSr VR256:$src, (i32 0x4))>;
6775f4a2713aSLionel Sambuc  def : Pat<(v8f32 (ftrunc VR256:$src)),
6776f4a2713aSLionel Sambuc            (VROUNDYPSr VR256:$src, (i32 0x3))>;
6777f4a2713aSLionel Sambuc
6778f4a2713aSLionel Sambuc  def : Pat<(v4f64 (ffloor VR256:$src)),
6779f4a2713aSLionel Sambuc            (VROUNDYPDr VR256:$src, (i32 0x1))>;
6780f4a2713aSLionel Sambuc  def : Pat<(v4f64 (fnearbyint VR256:$src)),
6781f4a2713aSLionel Sambuc            (VROUNDYPDr VR256:$src, (i32 0xC))>;
6782f4a2713aSLionel Sambuc  def : Pat<(v4f64 (fceil VR256:$src)),
6783f4a2713aSLionel Sambuc            (VROUNDYPDr VR256:$src, (i32 0x2))>;
6784f4a2713aSLionel Sambuc  def : Pat<(v4f64 (frint VR256:$src)),
6785f4a2713aSLionel Sambuc            (VROUNDYPDr VR256:$src, (i32 0x4))>;
6786f4a2713aSLionel Sambuc  def : Pat<(v4f64 (ftrunc VR256:$src)),
6787f4a2713aSLionel Sambuc            (VROUNDYPDr VR256:$src, (i32 0x3))>;
6788f4a2713aSLionel Sambuc}
6789f4a2713aSLionel Sambuc
6790f4a2713aSLionel Sambucdefm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
6791f4a2713aSLionel Sambuc                               memopv4f32, memopv2f64,
6792f4a2713aSLionel Sambuc                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
6793f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
6794f4a2713aSLionel Sambucdefm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
6795f4a2713aSLionel Sambuc                               int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
6796f4a2713aSLionel Sambuc
6797f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in {
6798f4a2713aSLionel Sambuc  def : Pat<(ffloor FR32:$src),
6799f4a2713aSLionel Sambuc            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
6800f4a2713aSLionel Sambuc  def : Pat<(f64 (ffloor FR64:$src)),
6801f4a2713aSLionel Sambuc            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
6802f4a2713aSLionel Sambuc  def : Pat<(f32 (fnearbyint FR32:$src)),
6803f4a2713aSLionel Sambuc            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
6804f4a2713aSLionel Sambuc  def : Pat<(f64 (fnearbyint FR64:$src)),
6805f4a2713aSLionel Sambuc            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
6806f4a2713aSLionel Sambuc  def : Pat<(f32 (fceil FR32:$src)),
6807f4a2713aSLionel Sambuc            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
6808f4a2713aSLionel Sambuc  def : Pat<(f64 (fceil FR64:$src)),
6809f4a2713aSLionel Sambuc            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
6810f4a2713aSLionel Sambuc  def : Pat<(f32 (frint FR32:$src)),
6811f4a2713aSLionel Sambuc            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
6812f4a2713aSLionel Sambuc  def : Pat<(f64 (frint FR64:$src)),
6813f4a2713aSLionel Sambuc            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
6814f4a2713aSLionel Sambuc  def : Pat<(f32 (ftrunc FR32:$src)),
6815f4a2713aSLionel Sambuc            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
6816f4a2713aSLionel Sambuc  def : Pat<(f64 (ftrunc FR64:$src)),
6817f4a2713aSLionel Sambuc            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
6818f4a2713aSLionel Sambuc
6819f4a2713aSLionel Sambuc  def : Pat<(v4f32 (ffloor VR128:$src)),
6820f4a2713aSLionel Sambuc            (ROUNDPSr VR128:$src, (i32 0x1))>;
6821f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fnearbyint VR128:$src)),
6822f4a2713aSLionel Sambuc            (ROUNDPSr VR128:$src, (i32 0xC))>;
6823f4a2713aSLionel Sambuc  def : Pat<(v4f32 (fceil VR128:$src)),
6824f4a2713aSLionel Sambuc            (ROUNDPSr VR128:$src, (i32 0x2))>;
6825f4a2713aSLionel Sambuc  def : Pat<(v4f32 (frint VR128:$src)),
6826f4a2713aSLionel Sambuc            (ROUNDPSr VR128:$src, (i32 0x4))>;
6827f4a2713aSLionel Sambuc  def : Pat<(v4f32 (ftrunc VR128:$src)),
6828f4a2713aSLionel Sambuc            (ROUNDPSr VR128:$src, (i32 0x3))>;
6829f4a2713aSLionel Sambuc
6830f4a2713aSLionel Sambuc  def : Pat<(v2f64 (ffloor VR128:$src)),
6831f4a2713aSLionel Sambuc            (ROUNDPDr VR128:$src, (i32 0x1))>;
6832f4a2713aSLionel Sambuc  def : Pat<(v2f64 (fnearbyint VR128:$src)),
6833f4a2713aSLionel Sambuc            (ROUNDPDr VR128:$src, (i32 0xC))>;
6834f4a2713aSLionel Sambuc  def : Pat<(v2f64 (fceil VR128:$src)),
6835f4a2713aSLionel Sambuc            (ROUNDPDr VR128:$src, (i32 0x2))>;
6836f4a2713aSLionel Sambuc  def : Pat<(v2f64 (frint VR128:$src)),
6837f4a2713aSLionel Sambuc            (ROUNDPDr VR128:$src, (i32 0x4))>;
6838f4a2713aSLionel Sambuc  def : Pat<(v2f64 (ftrunc VR128:$src)),
6839f4a2713aSLionel Sambuc            (ROUNDPDr VR128:$src, (i32 0x3))>;
6840f4a2713aSLionel Sambuc}
6841f4a2713aSLionel Sambuc
6842f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6843f4a2713aSLionel Sambuc// SSE4.1 - Packed Bit Test
6844f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6845f4a2713aSLionel Sambuc
6846f4a2713aSLionel Sambuc// ptest instruction we'll lower to this in X86ISelLowering primarily from
6847f4a2713aSLionel Sambuc// the intel intrinsic that corresponds to this.
6848f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasAVX] in {
6849f4a2713aSLionel Sambucdef VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
6850f4a2713aSLionel Sambuc                "vptest\t{$src2, $src1|$src1, $src2}",
6851f4a2713aSLionel Sambuc                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
6852*0a6a1f1dSLionel Sambuc                Sched<[WriteVecLogic]>, VEX;
6853f4a2713aSLionel Sambucdef VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
6854f4a2713aSLionel Sambuc                "vptest\t{$src2, $src1|$src1, $src2}",
6855f4a2713aSLionel Sambuc                [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
6856*0a6a1f1dSLionel Sambuc                Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
6857f4a2713aSLionel Sambuc
6858f4a2713aSLionel Sambucdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
6859f4a2713aSLionel Sambuc                "vptest\t{$src2, $src1|$src1, $src2}",
6860f4a2713aSLionel Sambuc                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
6861*0a6a1f1dSLionel Sambuc                Sched<[WriteVecLogic]>, VEX, VEX_L;
6862f4a2713aSLionel Sambucdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
6863f4a2713aSLionel Sambuc                "vptest\t{$src2, $src1|$src1, $src2}",
6864f4a2713aSLionel Sambuc                [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
6865*0a6a1f1dSLionel Sambuc                Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L;
6866f4a2713aSLionel Sambuc}
6867f4a2713aSLionel Sambuc
6868f4a2713aSLionel Sambuclet Defs = [EFLAGS] in {
6869f4a2713aSLionel Sambucdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
6870f4a2713aSLionel Sambuc              "ptest\t{$src2, $src1|$src1, $src2}",
6871f4a2713aSLionel Sambuc              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
6872*0a6a1f1dSLionel Sambuc              Sched<[WriteVecLogic]>;
6873f4a2713aSLionel Sambucdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
6874f4a2713aSLionel Sambuc              "ptest\t{$src2, $src1|$src1, $src2}",
6875f4a2713aSLionel Sambuc              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
6876*0a6a1f1dSLionel Sambuc              Sched<[WriteVecLogicLd, ReadAfterLd]>;
6877f4a2713aSLionel Sambuc}
6878f4a2713aSLionel Sambuc
6879f4a2713aSLionel Sambuc// The bit test instructions below are AVX only
6880f4a2713aSLionel Sambucmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
6881f4a2713aSLionel Sambuc                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
6882f4a2713aSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
6883f4a2713aSLionel Sambuc            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
6884*0a6a1f1dSLionel Sambuc            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
6885*0a6a1f1dSLionel Sambuc            Sched<[WriteVecLogic]>, VEX;
6886f4a2713aSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
6887f4a2713aSLionel Sambuc            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
6888f4a2713aSLionel Sambuc            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
6889*0a6a1f1dSLionel Sambuc            Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
6890f4a2713aSLionel Sambuc}
6891f4a2713aSLionel Sambuc
6892f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasAVX] in {
6893f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
6894f4a2713aSLionel Sambucdefm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>;
6895f4a2713aSLionel Sambucdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>,
6896f4a2713aSLionel Sambuc                            VEX_L;
6897f4a2713aSLionel Sambuc}
6898f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in {
6899f4a2713aSLionel Sambucdefm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>;
6900f4a2713aSLionel Sambucdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
6901f4a2713aSLionel Sambuc                            VEX_L;
6902f4a2713aSLionel Sambuc}
6903f4a2713aSLionel Sambuc}
6904f4a2713aSLionel Sambuc
6905f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6906f4a2713aSLionel Sambuc// SSE4.1 - Misc Instructions
6907f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
6908f4a2713aSLionel Sambuc
6909f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
6910f4a2713aSLionel Sambuc  def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
6911f4a2713aSLionel Sambuc                     "popcnt{w}\t{$src, $dst|$dst, $src}",
6912f4a2713aSLionel Sambuc                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
6913*0a6a1f1dSLionel Sambuc                     IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>,
6914*0a6a1f1dSLionel Sambuc                     OpSize16, XS;
6915f4a2713aSLionel Sambuc  def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
6916f4a2713aSLionel Sambuc                     "popcnt{w}\t{$src, $dst|$dst, $src}",
6917f4a2713aSLionel Sambuc                     [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
6918*0a6a1f1dSLionel Sambuc                      (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
6919*0a6a1f1dSLionel Sambuc                      Sched<[WriteFAddLd]>, OpSize16, XS;
6920f4a2713aSLionel Sambuc
6921f4a2713aSLionel Sambuc  def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
6922f4a2713aSLionel Sambuc                     "popcnt{l}\t{$src, $dst|$dst, $src}",
6923f4a2713aSLionel Sambuc                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
6924*0a6a1f1dSLionel Sambuc                     IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>,
6925*0a6a1f1dSLionel Sambuc                     OpSize32, XS;
6926*0a6a1f1dSLionel Sambuc
6927f4a2713aSLionel Sambuc  def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
6928f4a2713aSLionel Sambuc                     "popcnt{l}\t{$src, $dst|$dst, $src}",
6929f4a2713aSLionel Sambuc                     [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
6930*0a6a1f1dSLionel Sambuc                      (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
6931*0a6a1f1dSLionel Sambuc                      Sched<[WriteFAddLd]>, OpSize32, XS;
6932f4a2713aSLionel Sambuc
6933f4a2713aSLionel Sambuc  def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
6934f4a2713aSLionel Sambuc                      "popcnt{q}\t{$src, $dst|$dst, $src}",
6935f4a2713aSLionel Sambuc                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
6936*0a6a1f1dSLionel Sambuc                      IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS;
6937f4a2713aSLionel Sambuc  def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
6938f4a2713aSLionel Sambuc                      "popcnt{q}\t{$src, $dst|$dst, $src}",
6939f4a2713aSLionel Sambuc                      [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
6940*0a6a1f1dSLionel Sambuc                       (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
6941*0a6a1f1dSLionel Sambuc                       Sched<[WriteFAddLd]>, XS;
6942f4a2713aSLionel Sambuc}
6943f4a2713aSLionel Sambuc
6944f4a2713aSLionel Sambuc
6945f4a2713aSLionel Sambuc
6946f4a2713aSLionel Sambuc// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
6947f4a2713aSLionel Sambucmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
6948*0a6a1f1dSLionel Sambuc                                 Intrinsic IntId128,
6949*0a6a1f1dSLionel Sambuc                                 X86FoldableSchedWrite Sched> {
6950f4a2713aSLionel Sambuc  def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6951f4a2713aSLionel Sambuc                    (ins VR128:$src),
6952f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6953*0a6a1f1dSLionel Sambuc                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
6954*0a6a1f1dSLionel Sambuc                    Sched<[Sched]>;
6955f4a2713aSLionel Sambuc  def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6956f4a2713aSLionel Sambuc                     (ins i128mem:$src),
6957f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6958f4a2713aSLionel Sambuc                     [(set VR128:$dst,
6959*0a6a1f1dSLionel Sambuc                       (IntId128 (bitconvert (memopv2i64 addr:$src))))]>,
6960*0a6a1f1dSLionel Sambuc                    Sched<[Sched.Folded]>;
6961f4a2713aSLionel Sambuc}
6962f4a2713aSLionel Sambuc
6963*0a6a1f1dSLionel Sambuc// PHMIN has the same profile as PSAD, thus we use the same scheduling
6964*0a6a1f1dSLionel Sambuc// model, although the naming is misleading.
6965f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
6966f4a2713aSLionel Sambucdefm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
6967*0a6a1f1dSLionel Sambuc                                         int_x86_sse41_phminposuw,
6968*0a6a1f1dSLionel Sambuc                                         WriteVecIMul>, VEX;
6969f4a2713aSLionel Sambucdefm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
6970*0a6a1f1dSLionel Sambuc                                         int_x86_sse41_phminposuw,
6971*0a6a1f1dSLionel Sambuc                                         WriteVecIMul>;
6972f4a2713aSLionel Sambuc
6973f4a2713aSLionel Sambuc/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
6974f4a2713aSLionel Sambucmulticlass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
6975f4a2713aSLionel Sambuc                              Intrinsic IntId128, bit Is2Addr = 1,
6976f4a2713aSLionel Sambuc                              OpndItins itins = DEFAULT_ITINS> {
6977f4a2713aSLionel Sambuc  let isCommutable = 1 in
6978f4a2713aSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6979f4a2713aSLionel Sambuc       (ins VR128:$src1, VR128:$src2),
6980f4a2713aSLionel Sambuc       !if(Is2Addr,
6981f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6982f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6983f4a2713aSLionel Sambuc       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
6984*0a6a1f1dSLionel Sambuc       itins.rr>, Sched<[itins.Sched]>;
6985f4a2713aSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6986f4a2713aSLionel Sambuc       (ins VR128:$src1, i128mem:$src2),
6987f4a2713aSLionel Sambuc       !if(Is2Addr,
6988f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6989f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6990f4a2713aSLionel Sambuc       [(set VR128:$dst,
6991*0a6a1f1dSLionel Sambuc         (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))],
6992*0a6a1f1dSLionel Sambuc       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6993f4a2713aSLionel Sambuc}
6994f4a2713aSLionel Sambuc
6995f4a2713aSLionel Sambuc/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
6996f4a2713aSLionel Sambucmulticlass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
6997*0a6a1f1dSLionel Sambuc                                Intrinsic IntId256,
6998*0a6a1f1dSLionel Sambuc                                X86FoldableSchedWrite Sched> {
6999f4a2713aSLionel Sambuc  let isCommutable = 1 in
7000f4a2713aSLionel Sambuc  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
7001f4a2713aSLionel Sambuc       (ins VR256:$src1, VR256:$src2),
7002f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7003*0a6a1f1dSLionel Sambuc       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
7004*0a6a1f1dSLionel Sambuc       Sched<[Sched]>;
7005f4a2713aSLionel Sambuc  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
7006f4a2713aSLionel Sambuc       (ins VR256:$src1, i256mem:$src2),
7007f4a2713aSLionel Sambuc       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7008f4a2713aSLionel Sambuc       [(set VR256:$dst,
7009*0a6a1f1dSLionel Sambuc         (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
7010*0a6a1f1dSLionel Sambuc       Sched<[Sched.Folded, ReadAfterLd]>;
7011f4a2713aSLionel Sambuc}
7012f4a2713aSLionel Sambuc
7013f4a2713aSLionel Sambuc
7014f4a2713aSLionel Sambuc/// SS48I_binop_rm - Simple SSE41 binary operator.
7015f4a2713aSLionel Sambucmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7016f4a2713aSLionel Sambuc                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
7017f4a2713aSLionel Sambuc                          X86MemOperand x86memop, bit Is2Addr = 1,
7018*0a6a1f1dSLionel Sambuc                          OpndItins itins = SSE_INTALU_ITINS_P> {
7019f4a2713aSLionel Sambuc  let isCommutable = 1 in
7020f4a2713aSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
7021f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
7022f4a2713aSLionel Sambuc       !if(Is2Addr,
7023f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7024f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7025*0a6a1f1dSLionel Sambuc       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
7026*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched]>;
7027f4a2713aSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
7028f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
7029f4a2713aSLionel Sambuc       !if(Is2Addr,
7030f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7031f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7032f4a2713aSLionel Sambuc       [(set RC:$dst,
7033*0a6a1f1dSLionel Sambuc         (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
7034*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
7035f4a2713aSLionel Sambuc}
7036f4a2713aSLionel Sambuc
7037*0a6a1f1dSLionel Sambuc/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
7038*0a6a1f1dSLionel Sambuc/// types.
7039*0a6a1f1dSLionel Sambucmulticlass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
7040*0a6a1f1dSLionel Sambuc                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
7041*0a6a1f1dSLionel Sambuc                         PatFrag memop_frag, X86MemOperand x86memop,
7042*0a6a1f1dSLionel Sambuc                         OpndItins itins,
7043*0a6a1f1dSLionel Sambuc                         bit IsCommutable = 0, bit Is2Addr = 1> {
7044*0a6a1f1dSLionel Sambuc  let isCommutable = IsCommutable in
7045*0a6a1f1dSLionel Sambuc  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
7046*0a6a1f1dSLionel Sambuc       (ins RC:$src1, RC:$src2),
7047*0a6a1f1dSLionel Sambuc       !if(Is2Addr,
7048*0a6a1f1dSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7049*0a6a1f1dSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7050*0a6a1f1dSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
7051*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched]>;
7052*0a6a1f1dSLionel Sambuc  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
7053*0a6a1f1dSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
7054*0a6a1f1dSLionel Sambuc       !if(Is2Addr,
7055*0a6a1f1dSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7056*0a6a1f1dSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7057*0a6a1f1dSLionel Sambuc       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
7058*0a6a1f1dSLionel Sambuc                                     (bitconvert (memop_frag addr:$src2)))))]>,
7059*0a6a1f1dSLionel Sambuc       Sched<[itins.Sched.Folded, ReadAfterLd]>;
7060*0a6a1f1dSLionel Sambuc}
7061*0a6a1f1dSLionel Sambuc
7062*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
7063f4a2713aSLionel Sambuc  let isCommutable = 0 in
7064f4a2713aSLionel Sambuc  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
7065*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7066*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7067f4a2713aSLionel Sambuc  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
7068*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7069*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7070f4a2713aSLionel Sambuc  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
7071*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7072*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7073f4a2713aSLionel Sambuc  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
7074*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7075*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7076f4a2713aSLionel Sambuc  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
7077*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7078*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7079f4a2713aSLionel Sambuc  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
7080*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7081*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7082f4a2713aSLionel Sambuc  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
7083*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7084*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7085f4a2713aSLionel Sambuc  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
7086*0a6a1f1dSLionel Sambuc                                  loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7087*0a6a1f1dSLionel Sambuc                                  VEX_4V;
7088*0a6a1f1dSLionel Sambuc  defm VPMULDQ   : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
7089*0a6a1f1dSLionel Sambuc                                   VR128, loadv2i64, i128mem,
7090*0a6a1f1dSLionel Sambuc                                   SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
7091f4a2713aSLionel Sambuc}
7092f4a2713aSLionel Sambuc
7093*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2, NoVLX] in {
7094f4a2713aSLionel Sambuc  let isCommutable = 0 in
7095f4a2713aSLionel Sambuc  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
7096*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7097*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7098f4a2713aSLionel Sambuc  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
7099*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7100*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7101f4a2713aSLionel Sambuc  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
7102*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7103*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7104f4a2713aSLionel Sambuc  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
7105*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7106*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7107f4a2713aSLionel Sambuc  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
7108*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7109*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7110f4a2713aSLionel Sambuc  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
7111*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7112*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7113f4a2713aSLionel Sambuc  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
7114*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7115*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7116f4a2713aSLionel Sambuc  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
7117*0a6a1f1dSLionel Sambuc                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7118*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7119*0a6a1f1dSLionel Sambuc  defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
7120*0a6a1f1dSLionel Sambuc                                  VR256, loadv4i64, i256mem,
7121*0a6a1f1dSLionel Sambuc                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
7122f4a2713aSLionel Sambuc}
7123f4a2713aSLionel Sambuc
7124f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7125f4a2713aSLionel Sambuc  let isCommutable = 0 in
7126f4a2713aSLionel Sambuc  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
7127f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7128f4a2713aSLionel Sambuc  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
7129f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7130f4a2713aSLionel Sambuc  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
7131f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7132f4a2713aSLionel Sambuc  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
7133f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7134f4a2713aSLionel Sambuc  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
7135f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7136f4a2713aSLionel Sambuc  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
7137f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7138f4a2713aSLionel Sambuc  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
7139f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7140f4a2713aSLionel Sambuc  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
7141f4a2713aSLionel Sambuc                                 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
7142*0a6a1f1dSLionel Sambuc  defm PMULDQ   : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
7143*0a6a1f1dSLionel Sambuc                                  VR128, memopv2i64, i128mem,
7144*0a6a1f1dSLionel Sambuc                                  SSE_INTMUL_ITINS_P, 1>;
7145f4a2713aSLionel Sambuc}
7146f4a2713aSLionel Sambuc
7147*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in {
7148f4a2713aSLionel Sambuc  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
7149*0a6a1f1dSLionel Sambuc                                 memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
7150*0a6a1f1dSLionel Sambuc                                 VEX_4V;
7151f4a2713aSLionel Sambuc  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
7152*0a6a1f1dSLionel Sambuc                                 memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
7153*0a6a1f1dSLionel Sambuc                                 VEX_4V;
7154f4a2713aSLionel Sambuc}
7155f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
7156f4a2713aSLionel Sambuc  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
7157*0a6a1f1dSLionel Sambuc                                  memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
7158*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7159f4a2713aSLionel Sambuc  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
7160*0a6a1f1dSLionel Sambuc                                  memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
7161*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7162f4a2713aSLionel Sambuc}
7163f4a2713aSLionel Sambuc
7164f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7165f4a2713aSLionel Sambuc  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
7166f4a2713aSLionel Sambuc                                memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>;
7167f4a2713aSLionel Sambuc  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
7168f4a2713aSLionel Sambuc                                memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>;
7169f4a2713aSLionel Sambuc}
7170f4a2713aSLionel Sambuc
7171f4a2713aSLionel Sambuc/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
7172f4a2713aSLionel Sambucmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
7173f4a2713aSLionel Sambuc                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
7174f4a2713aSLionel Sambuc                 X86MemOperand x86memop, bit Is2Addr = 1,
7175f4a2713aSLionel Sambuc                 OpndItins itins = DEFAULT_ITINS> {
7176f4a2713aSLionel Sambuc  let isCommutable = 1 in
7177f4a2713aSLionel Sambuc  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
7178*0a6a1f1dSLionel Sambuc        (ins RC:$src1, RC:$src2, i8imm:$src3),
7179f4a2713aSLionel Sambuc        !if(Is2Addr,
7180f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
7181f4a2713aSLionel Sambuc                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7182f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
7183f4a2713aSLionel Sambuc                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
7184f4a2713aSLionel Sambuc        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
7185*0a6a1f1dSLionel Sambuc        Sched<[itins.Sched]>;
7186f4a2713aSLionel Sambuc  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
7187*0a6a1f1dSLionel Sambuc        (ins RC:$src1, x86memop:$src2, i8imm:$src3),
7188f4a2713aSLionel Sambuc        !if(Is2Addr,
7189f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
7190f4a2713aSLionel Sambuc                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7191f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
7192f4a2713aSLionel Sambuc                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
7193f4a2713aSLionel Sambuc        [(set RC:$dst,
7194f4a2713aSLionel Sambuc          (IntId RC:$src1,
7195f4a2713aSLionel Sambuc           (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>,
7196*0a6a1f1dSLionel Sambuc        Sched<[itins.Sched.Folded, ReadAfterLd]>;
7197f4a2713aSLionel Sambuc}
7198f4a2713aSLionel Sambuc
7199f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
7200f4a2713aSLionel Sambuc  let isCommutable = 0 in {
7201*0a6a1f1dSLionel Sambuc    defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
7202*0a6a1f1dSLionel Sambuc                                        VR128, loadv2i64, i128mem, 0,
7203*0a6a1f1dSLionel Sambuc                                        DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
7204*0a6a1f1dSLionel Sambuc  }
7205*0a6a1f1dSLionel Sambuc
7206f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in {
7207f4a2713aSLionel Sambuc  defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
7208*0a6a1f1dSLionel Sambuc                                      VR128, loadv4f32, f128mem, 0,
7209*0a6a1f1dSLionel Sambuc                                      DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
7210f4a2713aSLionel Sambuc  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
7211f4a2713aSLionel Sambuc                                  int_x86_avx_blend_ps_256, VR256, loadv8f32,
7212*0a6a1f1dSLionel Sambuc                                  f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
7213*0a6a1f1dSLionel Sambuc                                  VEX_4V, VEX_L;
7214f4a2713aSLionel Sambuc  }
7215f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in {
7216f4a2713aSLionel Sambuc  defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
7217*0a6a1f1dSLionel Sambuc                                      VR128, loadv2f64, f128mem, 0,
7218*0a6a1f1dSLionel Sambuc                                      DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
7219f4a2713aSLionel Sambuc  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
7220f4a2713aSLionel Sambuc                                   int_x86_avx_blend_pd_256,VR256, loadv4f64,
7221*0a6a1f1dSLionel Sambuc                                   f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
7222*0a6a1f1dSLionel Sambuc                                   VEX_4V, VEX_L;
7223f4a2713aSLionel Sambuc  }
7224f4a2713aSLionel Sambuc  defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
7225*0a6a1f1dSLionel Sambuc                                      VR128, loadv2i64, i128mem, 0,
7226*0a6a1f1dSLionel Sambuc                                      DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
7227*0a6a1f1dSLionel Sambuc
7228f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in
7229f4a2713aSLionel Sambuc  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
7230*0a6a1f1dSLionel Sambuc                                   VR128, loadv4f32, f128mem, 0,
7231*0a6a1f1dSLionel Sambuc                                   SSE_DPPS_ITINS>, VEX_4V;
7232f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in
7233f4a2713aSLionel Sambuc  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
7234*0a6a1f1dSLionel Sambuc                                   VR128, loadv2f64, f128mem, 0,
7235*0a6a1f1dSLionel Sambuc                                   SSE_DPPS_ITINS>, VEX_4V;
7236f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in
7237f4a2713aSLionel Sambuc  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
7238*0a6a1f1dSLionel Sambuc                                    VR256, loadv8f32, i256mem, 0,
7239*0a6a1f1dSLionel Sambuc                                    SSE_DPPS_ITINS>, VEX_4V, VEX_L;
7240f4a2713aSLionel Sambuc}
7241f4a2713aSLionel Sambuc
7242f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
7243f4a2713aSLionel Sambuc  let isCommutable = 0 in {
7244f4a2713aSLionel Sambuc  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
7245*0a6a1f1dSLionel Sambuc                                  VR256, loadv4i64, i256mem, 0,
7246*0a6a1f1dSLionel Sambuc                                  DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
7247f4a2713aSLionel Sambuc  }
7248*0a6a1f1dSLionel Sambuc  defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
7249*0a6a1f1dSLionel Sambuc                                  VR256, loadv4i64, i256mem, 0,
7250*0a6a1f1dSLionel Sambuc                                  DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
7251f4a2713aSLionel Sambuc}
7252f4a2713aSLionel Sambuc
7253f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7254f4a2713aSLionel Sambuc  let isCommutable = 0 in {
7255*0a6a1f1dSLionel Sambuc  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
7256*0a6a1f1dSLionel Sambuc                                     VR128, memopv2i64, i128mem,
7257*0a6a1f1dSLionel Sambuc                                     1, SSE_MPSADBW_ITINS>;
7258*0a6a1f1dSLionel Sambuc  }
7259f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in
7260f4a2713aSLionel Sambuc  defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
7261f4a2713aSLionel Sambuc                                     VR128, memopv4f32, f128mem,
7262*0a6a1f1dSLionel Sambuc                                     1, SSE_INTALU_ITINS_FBLEND_P>;
7263f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in
7264f4a2713aSLionel Sambuc  defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
7265f4a2713aSLionel Sambuc                                     VR128, memopv2f64, f128mem,
7266*0a6a1f1dSLionel Sambuc                                     1, SSE_INTALU_ITINS_FBLEND_P>;
7267f4a2713aSLionel Sambuc  defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
7268f4a2713aSLionel Sambuc                                     VR128, memopv2i64, i128mem,
7269*0a6a1f1dSLionel Sambuc                                     1, SSE_INTALU_ITINS_BLEND_P>;
7270f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedSingle in
7271f4a2713aSLionel Sambuc  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
7272f4a2713aSLionel Sambuc                                  VR128, memopv4f32, f128mem, 1,
7273f4a2713aSLionel Sambuc                                  SSE_DPPS_ITINS>;
7274f4a2713aSLionel Sambuc  let ExeDomain = SSEPackedDouble in
7275f4a2713aSLionel Sambuc  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
7276f4a2713aSLionel Sambuc                                  VR128, memopv2f64, f128mem, 1,
7277f4a2713aSLionel Sambuc                                  SSE_DPPD_ITINS>;
7278f4a2713aSLionel Sambuc}
7279f4a2713aSLionel Sambuc
7280f4a2713aSLionel Sambuc/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
7281f4a2713aSLionel Sambucmulticlass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
7282f4a2713aSLionel Sambuc                                    RegisterClass RC, X86MemOperand x86memop,
7283*0a6a1f1dSLionel Sambuc                                    PatFrag mem_frag, Intrinsic IntId,
7284*0a6a1f1dSLionel Sambuc                                    X86FoldableSchedWrite Sched> {
7285f4a2713aSLionel Sambuc  def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
7286f4a2713aSLionel Sambuc                  (ins RC:$src1, RC:$src2, RC:$src3),
7287f4a2713aSLionel Sambuc                  !strconcat(OpcodeStr,
7288f4a2713aSLionel Sambuc                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7289f4a2713aSLionel Sambuc                  [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
7290*0a6a1f1dSLionel Sambuc                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM,
7291*0a6a1f1dSLionel Sambuc                Sched<[Sched]>;
7292f4a2713aSLionel Sambuc
7293f4a2713aSLionel Sambuc  def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
7294f4a2713aSLionel Sambuc                  (ins RC:$src1, x86memop:$src2, RC:$src3),
7295f4a2713aSLionel Sambuc                  !strconcat(OpcodeStr,
7296f4a2713aSLionel Sambuc                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7297f4a2713aSLionel Sambuc                  [(set RC:$dst,
7298f4a2713aSLionel Sambuc                        (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
7299f4a2713aSLionel Sambuc                               RC:$src3))],
7300*0a6a1f1dSLionel Sambuc                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM,
7301*0a6a1f1dSLionel Sambuc                Sched<[Sched.Folded, ReadAfterLd]>;
7302f4a2713aSLionel Sambuc}
7303f4a2713aSLionel Sambuc
7304f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
7305f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in {
7306f4a2713aSLionel Sambucdefm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
7307*0a6a1f1dSLionel Sambuc                                           loadv2f64, int_x86_sse41_blendvpd,
7308*0a6a1f1dSLionel Sambuc                                           WriteFVarBlend>;
7309f4a2713aSLionel Sambucdefm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
7310*0a6a1f1dSLionel Sambuc                                  loadv4f64, int_x86_avx_blendv_pd_256,
7311*0a6a1f1dSLionel Sambuc                                  WriteFVarBlend>, VEX_L;
7312f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedDouble
7313f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
7314f4a2713aSLionel Sambucdefm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
7315*0a6a1f1dSLionel Sambuc                                           loadv4f32, int_x86_sse41_blendvps,
7316*0a6a1f1dSLionel Sambuc                                           WriteFVarBlend>;
7317f4a2713aSLionel Sambucdefm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
7318*0a6a1f1dSLionel Sambuc                                  loadv8f32, int_x86_avx_blendv_ps_256,
7319*0a6a1f1dSLionel Sambuc                                  WriteFVarBlend>, VEX_L;
7320f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedSingle
7321f4a2713aSLionel Sambucdefm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
7322*0a6a1f1dSLionel Sambuc                                           loadv2i64, int_x86_sse41_pblendvb,
7323*0a6a1f1dSLionel Sambuc                                           WriteVarBlend>;
7324f4a2713aSLionel Sambuc}
7325f4a2713aSLionel Sambuc
7326f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
7327f4a2713aSLionel Sambucdefm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
7328*0a6a1f1dSLionel Sambuc                                      loadv4i64, int_x86_avx2_pblendvb,
7329*0a6a1f1dSLionel Sambuc                                      WriteVarBlend>, VEX_L;
7330f4a2713aSLionel Sambuc}
7331f4a2713aSLionel Sambuc
7332f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
7333f4a2713aSLionel Sambuc  def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
7334f4a2713aSLionel Sambuc                            (v16i8 VR128:$src2))),
7335f4a2713aSLionel Sambuc            (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
7336f4a2713aSLionel Sambuc  def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
7337f4a2713aSLionel Sambuc                            (v4i32 VR128:$src2))),
7338f4a2713aSLionel Sambuc            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
7339f4a2713aSLionel Sambuc  def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
7340f4a2713aSLionel Sambuc                            (v4f32 VR128:$src2))),
7341f4a2713aSLionel Sambuc            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
7342f4a2713aSLionel Sambuc  def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
7343f4a2713aSLionel Sambuc                            (v2i64 VR128:$src2))),
7344f4a2713aSLionel Sambuc            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
7345f4a2713aSLionel Sambuc  def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
7346f4a2713aSLionel Sambuc                            (v2f64 VR128:$src2))),
7347f4a2713aSLionel Sambuc            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
7348f4a2713aSLionel Sambuc  def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
7349f4a2713aSLionel Sambuc                            (v8i32 VR256:$src2))),
7350f4a2713aSLionel Sambuc            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
7351f4a2713aSLionel Sambuc  def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
7352f4a2713aSLionel Sambuc                            (v8f32 VR256:$src2))),
7353f4a2713aSLionel Sambuc            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
7354f4a2713aSLionel Sambuc  def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
7355f4a2713aSLionel Sambuc                            (v4i64 VR256:$src2))),
7356f4a2713aSLionel Sambuc            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
7357f4a2713aSLionel Sambuc  def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
7358f4a2713aSLionel Sambuc                            (v4f64 VR256:$src2))),
7359f4a2713aSLionel Sambuc            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
7360f4a2713aSLionel Sambuc
7361f4a2713aSLionel Sambuc  def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
7362f4a2713aSLionel Sambuc                               (imm:$mask))),
7363f4a2713aSLionel Sambuc            (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
7364f4a2713aSLionel Sambuc  def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
7365f4a2713aSLionel Sambuc                               (imm:$mask))),
7366f4a2713aSLionel Sambuc            (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
7367f4a2713aSLionel Sambuc
7368f4a2713aSLionel Sambuc  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
7369f4a2713aSLionel Sambuc                               (imm:$mask))),
7370f4a2713aSLionel Sambuc            (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
7371f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
7372f4a2713aSLionel Sambuc                               (imm:$mask))),
7373f4a2713aSLionel Sambuc            (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
7374f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
7375f4a2713aSLionel Sambuc                               (imm:$mask))),
7376f4a2713aSLionel Sambuc            (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
7377f4a2713aSLionel Sambuc}
7378f4a2713aSLionel Sambuc
7379f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
7380f4a2713aSLionel Sambuc  def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
7381f4a2713aSLionel Sambuc                            (v32i8 VR256:$src2))),
7382f4a2713aSLionel Sambuc            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
7383f4a2713aSLionel Sambuc  def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
7384f4a2713aSLionel Sambuc                               (imm:$mask))),
7385f4a2713aSLionel Sambuc            (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
7386f4a2713aSLionel Sambuc}
7387f4a2713aSLionel Sambuc
7388*0a6a1f1dSLionel Sambuc// Patterns
7389*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in {
7390*0a6a1f1dSLionel Sambuc  let AddedComplexity = 15 in {
7391*0a6a1f1dSLionel Sambuc  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
7392*0a6a1f1dSLionel Sambuc  // MOVS{S,D} to the lower bits.
7393*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
7394*0a6a1f1dSLionel Sambuc            (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
7395*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
7396*0a6a1f1dSLionel Sambuc            (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
7397*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
7398*0a6a1f1dSLionel Sambuc            (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
7399*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
7400*0a6a1f1dSLionel Sambuc            (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
7401*0a6a1f1dSLionel Sambuc
7402*0a6a1f1dSLionel Sambuc  // Move low f32 and clear high bits.
7403*0a6a1f1dSLionel Sambuc  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
7404*0a6a1f1dSLionel Sambuc            (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
7405*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
7406*0a6a1f1dSLionel Sambuc            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
7407*0a6a1f1dSLionel Sambuc  }
7408*0a6a1f1dSLionel Sambuc
7409*0a6a1f1dSLionel Sambuc  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
7410*0a6a1f1dSLionel Sambuc                   (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
7411*0a6a1f1dSLionel Sambuc            (SUBREG_TO_REG (i32 0),
7412*0a6a1f1dSLionel Sambuc                           (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
7413*0a6a1f1dSLionel Sambuc                           sub_xmm)>;
7414*0a6a1f1dSLionel Sambuc  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
7415*0a6a1f1dSLionel Sambuc                   (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
7416*0a6a1f1dSLionel Sambuc            (SUBREG_TO_REG (i64 0),
7417*0a6a1f1dSLionel Sambuc                           (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
7418*0a6a1f1dSLionel Sambuc                           sub_xmm)>;
7419*0a6a1f1dSLionel Sambuc
7420*0a6a1f1dSLionel Sambuc  // Move low f64 and clear high bits.
7421*0a6a1f1dSLionel Sambuc  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
7422*0a6a1f1dSLionel Sambuc            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
7423*0a6a1f1dSLionel Sambuc
7424*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
7425*0a6a1f1dSLionel Sambuc            (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
7426*0a6a1f1dSLionel Sambuc}
7427*0a6a1f1dSLionel Sambuc
7428*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in {
7429*0a6a1f1dSLionel Sambuc  // With SSE41 we can use blends for these patterns.
7430*0a6a1f1dSLionel Sambuc  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
7431*0a6a1f1dSLionel Sambuc            (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
7432*0a6a1f1dSLionel Sambuc  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
7433*0a6a1f1dSLionel Sambuc            (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
7434*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
7435*0a6a1f1dSLionel Sambuc            (BLENDPDrri (v2f64 (V_SET0)), VR128:$src, (i8 1))>;
7436*0a6a1f1dSLionel Sambuc}
7437*0a6a1f1dSLionel Sambuc
7438*0a6a1f1dSLionel Sambuc
7439f4a2713aSLionel Sambuc/// SS41I_ternary_int - SSE 4.1 ternary operator
7440f4a2713aSLionel Sambuclet Uses = [XMM0], Constraints = "$src1 = $dst" in {
7441f4a2713aSLionel Sambuc  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
7442f4a2713aSLionel Sambuc                               X86MemOperand x86memop, Intrinsic IntId,
7443f4a2713aSLionel Sambuc                               OpndItins itins = DEFAULT_ITINS> {
7444f4a2713aSLionel Sambuc    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
7445f4a2713aSLionel Sambuc                    (ins VR128:$src1, VR128:$src2),
7446f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
7447f4a2713aSLionel Sambuc                     "\t{$src2, $dst|$dst, $src2}"),
7448f4a2713aSLionel Sambuc                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
7449*0a6a1f1dSLionel Sambuc                    itins.rr>, Sched<[itins.Sched]>;
7450f4a2713aSLionel Sambuc
7451f4a2713aSLionel Sambuc    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
7452f4a2713aSLionel Sambuc                    (ins VR128:$src1, x86memop:$src2),
7453f4a2713aSLionel Sambuc                    !strconcat(OpcodeStr,
7454f4a2713aSLionel Sambuc                     "\t{$src2, $dst|$dst, $src2}"),
7455f4a2713aSLionel Sambuc                    [(set VR128:$dst,
7456f4a2713aSLionel Sambuc                      (IntId VR128:$src1,
7457f4a2713aSLionel Sambuc                       (bitconvert (mem_frag addr:$src2)), XMM0))],
7458*0a6a1f1dSLionel Sambuc                       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7459f4a2713aSLionel Sambuc  }
7460f4a2713aSLionel Sambuc}
7461f4a2713aSLionel Sambuc
7462f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in
7463f4a2713aSLionel Sambucdefm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
7464*0a6a1f1dSLionel Sambuc                                  int_x86_sse41_blendvpd,
7465*0a6a1f1dSLionel Sambuc                                  DEFAULT_ITINS_FBLENDSCHED>;
7466f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in
7467f4a2713aSLionel Sambucdefm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
7468*0a6a1f1dSLionel Sambuc                                  int_x86_sse41_blendvps,
7469*0a6a1f1dSLionel Sambuc                                  DEFAULT_ITINS_FBLENDSCHED>;
7470f4a2713aSLionel Sambucdefm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
7471*0a6a1f1dSLionel Sambuc                                  int_x86_sse41_pblendvb,
7472*0a6a1f1dSLionel Sambuc                                  DEFAULT_ITINS_VARBLENDSCHED>;
7473f4a2713aSLionel Sambuc
7474f4a2713aSLionel Sambuc// Aliases with the implicit xmm0 argument
7475f4a2713aSLionel Sambucdef : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7476f4a2713aSLionel Sambuc                (BLENDVPDrr0 VR128:$dst, VR128:$src2)>;
7477f4a2713aSLionel Sambucdef : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7478f4a2713aSLionel Sambuc                (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>;
7479f4a2713aSLionel Sambucdef : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7480f4a2713aSLionel Sambuc                (BLENDVPSrr0 VR128:$dst, VR128:$src2)>;
7481f4a2713aSLionel Sambucdef : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7482f4a2713aSLionel Sambuc                (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>;
7483f4a2713aSLionel Sambucdef : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7484f4a2713aSLionel Sambuc                (PBLENDVBrr0 VR128:$dst, VR128:$src2)>;
7485f4a2713aSLionel Sambucdef : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7486f4a2713aSLionel Sambuc                (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
7487f4a2713aSLionel Sambuc
7488f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in {
7489f4a2713aSLionel Sambuc  def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
7490f4a2713aSLionel Sambuc                            (v16i8 VR128:$src2))),
7491f4a2713aSLionel Sambuc            (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
7492f4a2713aSLionel Sambuc  def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
7493f4a2713aSLionel Sambuc                            (v4i32 VR128:$src2))),
7494f4a2713aSLionel Sambuc            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
7495f4a2713aSLionel Sambuc  def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
7496f4a2713aSLionel Sambuc                            (v4f32 VR128:$src2))),
7497f4a2713aSLionel Sambuc            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
7498f4a2713aSLionel Sambuc  def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
7499f4a2713aSLionel Sambuc                            (v2i64 VR128:$src2))),
7500f4a2713aSLionel Sambuc            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
7501f4a2713aSLionel Sambuc  def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
7502f4a2713aSLionel Sambuc                            (v2f64 VR128:$src2))),
7503f4a2713aSLionel Sambuc            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
7504f4a2713aSLionel Sambuc
7505f4a2713aSLionel Sambuc  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
7506f4a2713aSLionel Sambuc                               (imm:$mask))),
7507f4a2713aSLionel Sambuc            (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
7508f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
7509f4a2713aSLionel Sambuc                               (imm:$mask))),
7510f4a2713aSLionel Sambuc            (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
7511f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
7512f4a2713aSLionel Sambuc                               (imm:$mask))),
7513f4a2713aSLionel Sambuc            (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
7514f4a2713aSLionel Sambuc
7515f4a2713aSLionel Sambuc}
7516f4a2713aSLionel Sambuc
7517*0a6a1f1dSLionel Sambuclet SchedRW = [WriteLoad] in {
7518f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
7519f4a2713aSLionel Sambucdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
7520f4a2713aSLionel Sambuc                       "vmovntdqa\t{$src, $dst|$dst, $src}",
7521f4a2713aSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
7522*0a6a1f1dSLionel Sambuc                       VEX;
7523f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
7524f4a2713aSLionel Sambucdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
7525f4a2713aSLionel Sambuc                         "vmovntdqa\t{$src, $dst|$dst, $src}",
7526f4a2713aSLionel Sambuc                         [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
7527*0a6a1f1dSLionel Sambuc                         VEX, VEX_L;
7528f4a2713aSLionel Sambucdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
7529f4a2713aSLionel Sambuc                       "movntdqa\t{$src, $dst|$dst, $src}",
7530*0a6a1f1dSLionel Sambuc                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
7531*0a6a1f1dSLionel Sambuc} // SchedRW
7532f4a2713aSLionel Sambuc
7533f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7534f4a2713aSLionel Sambuc// SSE4.2 - Compare Instructions
7535f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7536f4a2713aSLionel Sambuc
7537f4a2713aSLionel Sambuc/// SS42I_binop_rm - Simple SSE 4.2 binary operator
7538f4a2713aSLionel Sambucmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7539f4a2713aSLionel Sambuc                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
7540f4a2713aSLionel Sambuc                          X86MemOperand x86memop, bit Is2Addr = 1> {
7541f4a2713aSLionel Sambuc  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
7542f4a2713aSLionel Sambuc       (ins RC:$src1, RC:$src2),
7543f4a2713aSLionel Sambuc       !if(Is2Addr,
7544f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7545f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7546*0a6a1f1dSLionel Sambuc       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>;
7547f4a2713aSLionel Sambuc  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
7548f4a2713aSLionel Sambuc       (ins RC:$src1, x86memop:$src2),
7549f4a2713aSLionel Sambuc       !if(Is2Addr,
7550f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7551f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7552f4a2713aSLionel Sambuc       [(set RC:$dst,
7553*0a6a1f1dSLionel Sambuc         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>;
7554f4a2713aSLionel Sambuc}
7555f4a2713aSLionel Sambuc
7556f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
7557f4a2713aSLionel Sambuc  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
7558f4a2713aSLionel Sambuc                                 loadv2i64, i128mem, 0>, VEX_4V;
7559f4a2713aSLionel Sambuc
7560f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
7561f4a2713aSLionel Sambuc  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
7562f4a2713aSLionel Sambuc                                  loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
7563f4a2713aSLionel Sambuc
7564f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in
7565f4a2713aSLionel Sambuc  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
7566f4a2713aSLionel Sambuc                                memopv2i64, i128mem>;
7567f4a2713aSLionel Sambuc
7568f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7569f4a2713aSLionel Sambuc// SSE4.2 - String/text Processing Instructions
7570f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7571f4a2713aSLionel Sambuc
7572f4a2713aSLionel Sambuc// Packed Compare Implicit Length Strings, Return Mask
7573f4a2713aSLionel Sambucmulticlass pseudo_pcmpistrm<string asm> {
7574f4a2713aSLionel Sambuc  def REG : PseudoI<(outs VR128:$dst),
7575f4a2713aSLionel Sambuc                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7576f4a2713aSLionel Sambuc    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
7577f4a2713aSLionel Sambuc                                                  imm:$src3))]>;
7578f4a2713aSLionel Sambuc  def MEM : PseudoI<(outs VR128:$dst),
7579f4a2713aSLionel Sambuc                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7580f4a2713aSLionel Sambuc    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
7581f4a2713aSLionel Sambuc                       (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
7582f4a2713aSLionel Sambuc}
7583f4a2713aSLionel Sambuc
7584f4a2713aSLionel Sambuclet Defs = [EFLAGS], usesCustomInserter = 1 in {
7585f4a2713aSLionel Sambuc  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
7586f4a2713aSLionel Sambuc  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
7587f4a2713aSLionel Sambuc}
7588f4a2713aSLionel Sambuc
7589f4a2713aSLionel Sambucmulticlass pcmpistrm_SS42AI<string asm> {
7590f4a2713aSLionel Sambuc  def rr : SS42AI<0x62, MRMSrcReg, (outs),
7591f4a2713aSLionel Sambuc    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7592f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7593*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpIStrM]>;
7594f4a2713aSLionel Sambuc  let mayLoad = 1 in
7595f4a2713aSLionel Sambuc  def rm :SS42AI<0x62, MRMSrcMem, (outs),
7596f4a2713aSLionel Sambuc    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7597f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7598*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>;
7599f4a2713aSLionel Sambuc}
7600f4a2713aSLionel Sambuc
7601*0a6a1f1dSLionel Sambuclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
7602f4a2713aSLionel Sambuc  let Predicates = [HasAVX] in
7603f4a2713aSLionel Sambuc  defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
7604f4a2713aSLionel Sambuc  defm PCMPISTRM128  : pcmpistrm_SS42AI<"pcmpistrm"> ;
7605f4a2713aSLionel Sambuc}
7606f4a2713aSLionel Sambuc
7607f4a2713aSLionel Sambuc// Packed Compare Explicit Length Strings, Return Mask
7608f4a2713aSLionel Sambucmulticlass pseudo_pcmpestrm<string asm> {
7609f4a2713aSLionel Sambuc  def REG : PseudoI<(outs VR128:$dst),
7610f4a2713aSLionel Sambuc                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7611f4a2713aSLionel Sambuc    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
7612f4a2713aSLionel Sambuc                       VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
7613f4a2713aSLionel Sambuc  def MEM : PseudoI<(outs VR128:$dst),
7614f4a2713aSLionel Sambuc                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7615f4a2713aSLionel Sambuc    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
7616f4a2713aSLionel Sambuc                       (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
7617f4a2713aSLionel Sambuc}
7618f4a2713aSLionel Sambuc
7619f4a2713aSLionel Sambuclet Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
7620f4a2713aSLionel Sambuc  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
7621f4a2713aSLionel Sambuc  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
7622f4a2713aSLionel Sambuc}
7623f4a2713aSLionel Sambuc
7624f4a2713aSLionel Sambucmulticlass SS42AI_pcmpestrm<string asm> {
7625f4a2713aSLionel Sambuc  def rr : SS42AI<0x60, MRMSrcReg, (outs),
7626f4a2713aSLionel Sambuc    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7627f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7628*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpEStrM]>;
7629f4a2713aSLionel Sambuc  let mayLoad = 1 in
7630f4a2713aSLionel Sambuc  def rm : SS42AI<0x60, MRMSrcMem, (outs),
7631f4a2713aSLionel Sambuc    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7632f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7633*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>;
7634f4a2713aSLionel Sambuc}
7635f4a2713aSLionel Sambuc
7636*0a6a1f1dSLionel Sambuclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
7637f4a2713aSLionel Sambuc  let Predicates = [HasAVX] in
7638f4a2713aSLionel Sambuc  defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
7639f4a2713aSLionel Sambuc  defm PCMPESTRM128 :  SS42AI_pcmpestrm<"pcmpestrm">;
7640f4a2713aSLionel Sambuc}
7641f4a2713aSLionel Sambuc
7642f4a2713aSLionel Sambuc// Packed Compare Implicit Length Strings, Return Index
7643f4a2713aSLionel Sambucmulticlass pseudo_pcmpistri<string asm> {
7644f4a2713aSLionel Sambuc  def REG : PseudoI<(outs GR32:$dst),
7645f4a2713aSLionel Sambuc                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7646f4a2713aSLionel Sambuc    [(set GR32:$dst, EFLAGS,
7647f4a2713aSLionel Sambuc      (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
7648f4a2713aSLionel Sambuc  def MEM : PseudoI<(outs GR32:$dst),
7649f4a2713aSLionel Sambuc                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7650f4a2713aSLionel Sambuc    [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
7651f4a2713aSLionel Sambuc                              (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
7652f4a2713aSLionel Sambuc}
7653f4a2713aSLionel Sambuc
7654f4a2713aSLionel Sambuclet Defs = [EFLAGS], usesCustomInserter = 1 in {
7655f4a2713aSLionel Sambuc  defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
7656f4a2713aSLionel Sambuc  defm PCMPISTRI  : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
7657f4a2713aSLionel Sambuc}
7658f4a2713aSLionel Sambuc
7659f4a2713aSLionel Sambucmulticlass SS42AI_pcmpistri<string asm> {
7660f4a2713aSLionel Sambuc  def rr : SS42AI<0x63, MRMSrcReg, (outs),
7661f4a2713aSLionel Sambuc    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7662f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7663*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpIStrI]>;
7664f4a2713aSLionel Sambuc  let mayLoad = 1 in
7665f4a2713aSLionel Sambuc  def rm : SS42AI<0x63, MRMSrcMem, (outs),
7666f4a2713aSLionel Sambuc    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7667f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7668*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>;
7669f4a2713aSLionel Sambuc}
7670f4a2713aSLionel Sambuc
7671*0a6a1f1dSLionel Sambuclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
7672f4a2713aSLionel Sambuc  let Predicates = [HasAVX] in
7673f4a2713aSLionel Sambuc  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
7674f4a2713aSLionel Sambuc  defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
7675f4a2713aSLionel Sambuc}
7676f4a2713aSLionel Sambuc
7677f4a2713aSLionel Sambuc// Packed Compare Explicit Length Strings, Return Index
7678f4a2713aSLionel Sambucmulticlass pseudo_pcmpestri<string asm> {
7679f4a2713aSLionel Sambuc  def REG : PseudoI<(outs GR32:$dst),
7680f4a2713aSLionel Sambuc                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7681f4a2713aSLionel Sambuc    [(set GR32:$dst, EFLAGS,
7682f4a2713aSLionel Sambuc      (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
7683f4a2713aSLionel Sambuc  def MEM : PseudoI<(outs GR32:$dst),
7684f4a2713aSLionel Sambuc                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7685f4a2713aSLionel Sambuc    [(set GR32:$dst, EFLAGS,
7686f4a2713aSLionel Sambuc      (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
7687f4a2713aSLionel Sambuc       imm:$src5))]>;
7688f4a2713aSLionel Sambuc}
7689f4a2713aSLionel Sambuc
7690f4a2713aSLionel Sambuclet Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
7691f4a2713aSLionel Sambuc  defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
7692f4a2713aSLionel Sambuc  defm PCMPESTRI  : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
7693f4a2713aSLionel Sambuc}
7694f4a2713aSLionel Sambuc
7695f4a2713aSLionel Sambucmulticlass SS42AI_pcmpestri<string asm> {
7696f4a2713aSLionel Sambuc  def rr : SS42AI<0x61, MRMSrcReg, (outs),
7697f4a2713aSLionel Sambuc    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7698f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7699*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpEStrI]>;
7700f4a2713aSLionel Sambuc  let mayLoad = 1 in
7701f4a2713aSLionel Sambuc  def rm : SS42AI<0x61, MRMSrcMem, (outs),
7702f4a2713aSLionel Sambuc    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7703f4a2713aSLionel Sambuc    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7704*0a6a1f1dSLionel Sambuc    []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>;
7705f4a2713aSLionel Sambuc}
7706f4a2713aSLionel Sambuc
7707*0a6a1f1dSLionel Sambuclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
7708f4a2713aSLionel Sambuc  let Predicates = [HasAVX] in
7709f4a2713aSLionel Sambuc  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
7710f4a2713aSLionel Sambuc  defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
7711f4a2713aSLionel Sambuc}
7712f4a2713aSLionel Sambuc
7713f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7714f4a2713aSLionel Sambuc// SSE4.2 - CRC Instructions
7715f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7716f4a2713aSLionel Sambuc
7717f4a2713aSLionel Sambuc// No CRC instructions have AVX equivalents
7718f4a2713aSLionel Sambuc
7719f4a2713aSLionel Sambuc// crc intrinsic instruction
7720f4a2713aSLionel Sambuc// This set of instructions are only rm, the only difference is the size
7721f4a2713aSLionel Sambuc// of r and m.
7722f4a2713aSLionel Sambucclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
7723f4a2713aSLionel Sambuc                   RegisterClass RCIn, SDPatternOperator Int> :
7724f4a2713aSLionel Sambuc  SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
7725f4a2713aSLionel Sambuc         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
7726*0a6a1f1dSLionel Sambuc         [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>,
7727*0a6a1f1dSLionel Sambuc         Sched<[WriteFAdd]>;
7728f4a2713aSLionel Sambuc
7729f4a2713aSLionel Sambucclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
7730f4a2713aSLionel Sambuc                   X86MemOperand x86memop, SDPatternOperator Int> :
7731f4a2713aSLionel Sambuc  SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
7732f4a2713aSLionel Sambuc         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
7733f4a2713aSLionel Sambuc         [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
7734*0a6a1f1dSLionel Sambuc         IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>;
7735f4a2713aSLionel Sambuc
7736f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7737f4a2713aSLionel Sambuc  def CRC32r32m8  : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
7738f4a2713aSLionel Sambuc                                 int_x86_sse42_crc32_32_8>;
7739f4a2713aSLionel Sambuc  def CRC32r32r8  : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
7740f4a2713aSLionel Sambuc                                 int_x86_sse42_crc32_32_8>;
7741f4a2713aSLionel Sambuc  def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
7742*0a6a1f1dSLionel Sambuc                                 int_x86_sse42_crc32_32_16>, OpSize16;
7743f4a2713aSLionel Sambuc  def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
7744*0a6a1f1dSLionel Sambuc                                 int_x86_sse42_crc32_32_16>, OpSize16;
7745f4a2713aSLionel Sambuc  def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
7746*0a6a1f1dSLionel Sambuc                                 int_x86_sse42_crc32_32_32>, OpSize32;
7747f4a2713aSLionel Sambuc  def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
7748*0a6a1f1dSLionel Sambuc                                 int_x86_sse42_crc32_32_32>, OpSize32;
7749f4a2713aSLionel Sambuc  def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
7750f4a2713aSLionel Sambuc                                 int_x86_sse42_crc32_64_64>, REX_W;
7751f4a2713aSLionel Sambuc  def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
7752f4a2713aSLionel Sambuc                                 int_x86_sse42_crc32_64_64>, REX_W;
7753f4a2713aSLionel Sambuc  let hasSideEffects = 0 in {
7754f4a2713aSLionel Sambuc    let mayLoad = 1 in
7755f4a2713aSLionel Sambuc    def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
7756f4a2713aSLionel Sambuc                                   null_frag>, REX_W;
7757f4a2713aSLionel Sambuc    def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
7758f4a2713aSLionel Sambuc                                   null_frag>, REX_W;
7759f4a2713aSLionel Sambuc  }
7760f4a2713aSLionel Sambuc}
7761f4a2713aSLionel Sambuc
7762f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7763f4a2713aSLionel Sambuc// SHA-NI Instructions
7764f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7765f4a2713aSLionel Sambuc
7766f4a2713aSLionel Sambucmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
7767f4a2713aSLionel Sambuc                      bit UsesXMM0 = 0> {
7768f4a2713aSLionel Sambuc  def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
7769f4a2713aSLionel Sambuc             (ins VR128:$src1, VR128:$src2),
7770f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7771f4a2713aSLionel Sambuc             [!if(UsesXMM0,
7772f4a2713aSLionel Sambuc                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
7773f4a2713aSLionel Sambuc                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
7774f4a2713aSLionel Sambuc
7775f4a2713aSLionel Sambuc  def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
7776f4a2713aSLionel Sambuc             (ins VR128:$src1, i128mem:$src2),
7777f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7778f4a2713aSLionel Sambuc             [!if(UsesXMM0,
7779f4a2713aSLionel Sambuc                  (set VR128:$dst, (IntId VR128:$src1,
7780f4a2713aSLionel Sambuc                    (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
7781f4a2713aSLionel Sambuc                  (set VR128:$dst, (IntId VR128:$src1,
7782f4a2713aSLionel Sambuc                    (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8;
7783f4a2713aSLionel Sambuc}
7784f4a2713aSLionel Sambuc
7785f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
7786f4a2713aSLionel Sambuc  def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
7787f4a2713aSLionel Sambuc                         (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7788f4a2713aSLionel Sambuc                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7789f4a2713aSLionel Sambuc                         [(set VR128:$dst,
7790f4a2713aSLionel Sambuc                           (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
7791f4a2713aSLionel Sambuc                            (i8 imm:$src3)))]>, TA;
7792f4a2713aSLionel Sambuc  def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
7793f4a2713aSLionel Sambuc                         (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7794f4a2713aSLionel Sambuc                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7795f4a2713aSLionel Sambuc                         [(set VR128:$dst,
7796f4a2713aSLionel Sambuc                           (int_x86_sha1rnds4 VR128:$src1,
7797f4a2713aSLionel Sambuc                            (bc_v4i32 (memopv2i64 addr:$src2)),
7798f4a2713aSLionel Sambuc                            (i8 imm:$src3)))]>, TA;
7799f4a2713aSLionel Sambuc
7800f4a2713aSLionel Sambuc  defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>;
7801f4a2713aSLionel Sambuc  defm SHA1MSG1  : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>;
7802f4a2713aSLionel Sambuc  defm SHA1MSG2  : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>;
7803f4a2713aSLionel Sambuc
7804f4a2713aSLionel Sambuc  let Uses=[XMM0] in
7805f4a2713aSLionel Sambuc  defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>;
7806f4a2713aSLionel Sambuc
7807f4a2713aSLionel Sambuc  defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>;
7808f4a2713aSLionel Sambuc  defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>;
7809f4a2713aSLionel Sambuc}
7810f4a2713aSLionel Sambuc
7811f4a2713aSLionel Sambuc// Aliases with explicit %xmm0
7812f4a2713aSLionel Sambucdef : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7813f4a2713aSLionel Sambuc                (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
7814f4a2713aSLionel Sambucdef : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
7815f4a2713aSLionel Sambuc                (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
7816f4a2713aSLionel Sambuc
7817f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7818f4a2713aSLionel Sambuc// AES-NI Instructions
7819f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7820f4a2713aSLionel Sambuc
7821f4a2713aSLionel Sambucmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
7822f4a2713aSLionel Sambuc                              Intrinsic IntId128, bit Is2Addr = 1> {
7823f4a2713aSLionel Sambuc  def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
7824f4a2713aSLionel Sambuc       (ins VR128:$src1, VR128:$src2),
7825f4a2713aSLionel Sambuc       !if(Is2Addr,
7826f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7827f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7828f4a2713aSLionel Sambuc       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
7829*0a6a1f1dSLionel Sambuc       Sched<[WriteAESDecEnc]>;
7830f4a2713aSLionel Sambuc  def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
7831f4a2713aSLionel Sambuc       (ins VR128:$src1, i128mem:$src2),
7832f4a2713aSLionel Sambuc       !if(Is2Addr,
7833f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7834f4a2713aSLionel Sambuc           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7835f4a2713aSLionel Sambuc       [(set VR128:$dst,
7836*0a6a1f1dSLionel Sambuc         (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>,
7837*0a6a1f1dSLionel Sambuc       Sched<[WriteAESDecEncLd, ReadAfterLd]>;
7838f4a2713aSLionel Sambuc}
7839f4a2713aSLionel Sambuc
7840f4a2713aSLionel Sambuc// Perform One Round of an AES Encryption/Decryption Flow
7841f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in {
7842f4a2713aSLionel Sambuc  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
7843f4a2713aSLionel Sambuc                         int_x86_aesni_aesenc, 0>, VEX_4V;
7844f4a2713aSLionel Sambuc  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
7845f4a2713aSLionel Sambuc                         int_x86_aesni_aesenclast, 0>, VEX_4V;
7846f4a2713aSLionel Sambuc  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
7847f4a2713aSLionel Sambuc                         int_x86_aesni_aesdec, 0>, VEX_4V;
7848f4a2713aSLionel Sambuc  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
7849f4a2713aSLionel Sambuc                         int_x86_aesni_aesdeclast, 0>, VEX_4V;
7850f4a2713aSLionel Sambuc}
7851f4a2713aSLionel Sambuc
7852f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7853f4a2713aSLionel Sambuc  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
7854f4a2713aSLionel Sambuc                         int_x86_aesni_aesenc>;
7855f4a2713aSLionel Sambuc  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
7856f4a2713aSLionel Sambuc                         int_x86_aesni_aesenclast>;
7857f4a2713aSLionel Sambuc  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
7858f4a2713aSLionel Sambuc                         int_x86_aesni_aesdec>;
7859f4a2713aSLionel Sambuc  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
7860f4a2713aSLionel Sambuc                         int_x86_aesni_aesdeclast>;
7861f4a2713aSLionel Sambuc}
7862f4a2713aSLionel Sambuc
7863f4a2713aSLionel Sambuc// Perform the AES InvMixColumn Transformation
7864f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in {
7865f4a2713aSLionel Sambuc  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
7866f4a2713aSLionel Sambuc      (ins VR128:$src1),
7867f4a2713aSLionel Sambuc      "vaesimc\t{$src1, $dst|$dst, $src1}",
7868f4a2713aSLionel Sambuc      [(set VR128:$dst,
7869*0a6a1f1dSLionel Sambuc        (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
7870*0a6a1f1dSLionel Sambuc      VEX;
7871f4a2713aSLionel Sambuc  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
7872f4a2713aSLionel Sambuc      (ins i128mem:$src1),
7873f4a2713aSLionel Sambuc      "vaesimc\t{$src1, $dst|$dst, $src1}",
7874f4a2713aSLionel Sambuc      [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
7875*0a6a1f1dSLionel Sambuc      Sched<[WriteAESIMCLd]>, VEX;
7876f4a2713aSLionel Sambuc}
7877f4a2713aSLionel Sambucdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
7878f4a2713aSLionel Sambuc  (ins VR128:$src1),
7879f4a2713aSLionel Sambuc  "aesimc\t{$src1, $dst|$dst, $src1}",
7880f4a2713aSLionel Sambuc  [(set VR128:$dst,
7881*0a6a1f1dSLionel Sambuc    (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>;
7882f4a2713aSLionel Sambucdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
7883f4a2713aSLionel Sambuc  (ins i128mem:$src1),
7884f4a2713aSLionel Sambuc  "aesimc\t{$src1, $dst|$dst, $src1}",
7885f4a2713aSLionel Sambuc  [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
7886*0a6a1f1dSLionel Sambuc  Sched<[WriteAESIMCLd]>;
7887f4a2713aSLionel Sambuc
7888f4a2713aSLionel Sambuc// AES Round Key Generation Assist
7889f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in {
7890f4a2713aSLionel Sambuc  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
7891f4a2713aSLionel Sambuc      (ins VR128:$src1, i8imm:$src2),
7892f4a2713aSLionel Sambuc      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7893f4a2713aSLionel Sambuc      [(set VR128:$dst,
7894f4a2713aSLionel Sambuc        (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
7895*0a6a1f1dSLionel Sambuc      Sched<[WriteAESKeyGen]>, VEX;
7896f4a2713aSLionel Sambuc  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
7897f4a2713aSLionel Sambuc      (ins i128mem:$src1, i8imm:$src2),
7898f4a2713aSLionel Sambuc      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7899f4a2713aSLionel Sambuc      [(set VR128:$dst,
7900f4a2713aSLionel Sambuc        (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
7901*0a6a1f1dSLionel Sambuc      Sched<[WriteAESKeyGenLd]>, VEX;
7902f4a2713aSLionel Sambuc}
7903f4a2713aSLionel Sambucdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
7904f4a2713aSLionel Sambuc  (ins VR128:$src1, i8imm:$src2),
7905f4a2713aSLionel Sambuc  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7906f4a2713aSLionel Sambuc  [(set VR128:$dst,
7907f4a2713aSLionel Sambuc    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
7908*0a6a1f1dSLionel Sambuc  Sched<[WriteAESKeyGen]>;
7909f4a2713aSLionel Sambucdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
7910f4a2713aSLionel Sambuc  (ins i128mem:$src1, i8imm:$src2),
7911f4a2713aSLionel Sambuc  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7912f4a2713aSLionel Sambuc  [(set VR128:$dst,
7913f4a2713aSLionel Sambuc    (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
7914*0a6a1f1dSLionel Sambuc  Sched<[WriteAESKeyGenLd]>;
7915f4a2713aSLionel Sambuc
7916f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7917f4a2713aSLionel Sambuc// PCLMUL Instructions
7918f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7919f4a2713aSLionel Sambuc
7920f4a2713aSLionel Sambuc// AVX carry-less Multiplication instructions
7921f4a2713aSLionel Sambucdef VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
7922f4a2713aSLionel Sambuc           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7923f4a2713aSLionel Sambuc           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7924f4a2713aSLionel Sambuc           [(set VR128:$dst,
7925*0a6a1f1dSLionel Sambuc             (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
7926*0a6a1f1dSLionel Sambuc           Sched<[WriteCLMul]>;
7927f4a2713aSLionel Sambuc
7928f4a2713aSLionel Sambucdef VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
7929f4a2713aSLionel Sambuc           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7930f4a2713aSLionel Sambuc           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7931f4a2713aSLionel Sambuc           [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
7932*0a6a1f1dSLionel Sambuc                              (loadv2i64 addr:$src2), imm:$src3))]>,
7933*0a6a1f1dSLionel Sambuc           Sched<[WriteCLMulLd, ReadAfterLd]>;
7934f4a2713aSLionel Sambuc
7935f4a2713aSLionel Sambuc// Carry-less Multiplication instructions
7936f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in {
7937f4a2713aSLionel Sambucdef PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
7938f4a2713aSLionel Sambuc           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7939f4a2713aSLionel Sambuc           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7940f4a2713aSLionel Sambuc           [(set VR128:$dst,
7941f4a2713aSLionel Sambuc             (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
7942*0a6a1f1dSLionel Sambuc             IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
7943f4a2713aSLionel Sambuc
7944f4a2713aSLionel Sambucdef PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
7945f4a2713aSLionel Sambuc           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7946f4a2713aSLionel Sambuc           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7947f4a2713aSLionel Sambuc           [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
7948f4a2713aSLionel Sambuc                              (memopv2i64 addr:$src2), imm:$src3))],
7949*0a6a1f1dSLionel Sambuc                              IIC_SSE_PCLMULQDQ_RM>,
7950*0a6a1f1dSLionel Sambuc           Sched<[WriteCLMulLd, ReadAfterLd]>;
7951f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst"
7952f4a2713aSLionel Sambuc
7953f4a2713aSLionel Sambuc
7954f4a2713aSLionel Sambucmulticlass pclmul_alias<string asm, int immop> {
7955f4a2713aSLionel Sambuc  def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
7956*0a6a1f1dSLionel Sambuc                  (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>;
7957f4a2713aSLionel Sambuc
7958f4a2713aSLionel Sambuc  def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
7959*0a6a1f1dSLionel Sambuc                  (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>;
7960f4a2713aSLionel Sambuc
7961f4a2713aSLionel Sambuc  def : InstAlias<!strconcat("vpclmul", asm,
7962f4a2713aSLionel Sambuc                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
7963*0a6a1f1dSLionel Sambuc                  (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop),
7964*0a6a1f1dSLionel Sambuc                  0>;
7965f4a2713aSLionel Sambuc
7966f4a2713aSLionel Sambuc  def : InstAlias<!strconcat("vpclmul", asm,
7967f4a2713aSLionel Sambuc                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
7968*0a6a1f1dSLionel Sambuc                  (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop),
7969*0a6a1f1dSLionel Sambuc                  0>;
7970f4a2713aSLionel Sambuc}
7971f4a2713aSLionel Sambucdefm : pclmul_alias<"hqhq", 0x11>;
7972f4a2713aSLionel Sambucdefm : pclmul_alias<"hqlq", 0x01>;
7973f4a2713aSLionel Sambucdefm : pclmul_alias<"lqhq", 0x10>;
7974f4a2713aSLionel Sambucdefm : pclmul_alias<"lqlq", 0x00>;
7975f4a2713aSLionel Sambuc
7976f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7977f4a2713aSLionel Sambuc// SSE4A Instructions
7978f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
7979f4a2713aSLionel Sambuc
7980f4a2713aSLionel Sambuclet Predicates = [HasSSE4A] in {
7981f4a2713aSLionel Sambuc
7982f4a2713aSLionel Sambuclet Constraints = "$src = $dst" in {
7983*0a6a1f1dSLionel Sambucdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
7984f4a2713aSLionel Sambuc                 (ins VR128:$src, i8imm:$len, i8imm:$idx),
7985f4a2713aSLionel Sambuc                 "extrq\t{$idx, $len, $src|$src, $len, $idx}",
7986f4a2713aSLionel Sambuc                 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
7987*0a6a1f1dSLionel Sambuc                                    imm:$idx))]>, PD;
7988f4a2713aSLionel Sambucdef EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
7989f4a2713aSLionel Sambuc              (ins VR128:$src, VR128:$mask),
7990f4a2713aSLionel Sambuc              "extrq\t{$mask, $src|$src, $mask}",
7991f4a2713aSLionel Sambuc              [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
7992*0a6a1f1dSLionel Sambuc                                 VR128:$mask))]>, PD;
7993f4a2713aSLionel Sambuc
7994f4a2713aSLionel Sambucdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
7995f4a2713aSLionel Sambuc                   (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx),
7996f4a2713aSLionel Sambuc                   "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
7997f4a2713aSLionel Sambuc                   [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
7998f4a2713aSLionel Sambuc                                      VR128:$src2, imm:$len, imm:$idx))]>, XD;
7999f4a2713aSLionel Sambucdef INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
8000f4a2713aSLionel Sambuc                 (ins VR128:$src, VR128:$mask),
8001f4a2713aSLionel Sambuc                 "insertq\t{$mask, $src|$src, $mask}",
8002f4a2713aSLionel Sambuc                 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
8003f4a2713aSLionel Sambuc                                    VR128:$mask))]>, XD;
8004f4a2713aSLionel Sambuc}
8005f4a2713aSLionel Sambuc
8006f4a2713aSLionel Sambucdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
8007f4a2713aSLionel Sambuc                "movntss\t{$src, $dst|$dst, $src}",
8008f4a2713aSLionel Sambuc                [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
8009f4a2713aSLionel Sambuc
8010f4a2713aSLionel Sambucdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
8011f4a2713aSLionel Sambuc                "movntsd\t{$src, $dst|$dst, $src}",
8012f4a2713aSLionel Sambuc                [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD;
8013f4a2713aSLionel Sambuc}
8014f4a2713aSLionel Sambuc
8015f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8016f4a2713aSLionel Sambuc// AVX Instructions
8017f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8018f4a2713aSLionel Sambuc
8019f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8020f4a2713aSLionel Sambuc// VBROADCAST - Load from memory and broadcast to all elements of the
8021f4a2713aSLionel Sambuc//              destination operand
8022f4a2713aSLionel Sambuc//
8023f4a2713aSLionel Sambucclass avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
8024*0a6a1f1dSLionel Sambuc                    X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> :
8025f4a2713aSLionel Sambuc  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
8026f4a2713aSLionel Sambuc        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8027*0a6a1f1dSLionel Sambuc        [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
8028*0a6a1f1dSLionel Sambuc
8029*0a6a1f1dSLionel Sambucclass avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
8030*0a6a1f1dSLionel Sambuc                           X86MemOperand x86memop, ValueType VT,
8031*0a6a1f1dSLionel Sambuc                           PatFrag ld_frag, SchedWrite Sched> :
8032*0a6a1f1dSLionel Sambuc  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
8033*0a6a1f1dSLionel Sambuc        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8034*0a6a1f1dSLionel Sambuc        [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
8035*0a6a1f1dSLionel Sambuc        Sched<[Sched]>, VEX {
8036*0a6a1f1dSLionel Sambuc    let mayLoad = 1;
8037*0a6a1f1dSLionel Sambuc}
8038f4a2713aSLionel Sambuc
8039f4a2713aSLionel Sambuc// AVX2 adds register forms
8040f4a2713aSLionel Sambucclass avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
8041*0a6a1f1dSLionel Sambuc                         Intrinsic Int, SchedWrite Sched> :
8042f4a2713aSLionel Sambuc  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
8043f4a2713aSLionel Sambuc         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8044*0a6a1f1dSLionel Sambuc         [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
8045f4a2713aSLionel Sambuc
8046f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
8047*0a6a1f1dSLionel Sambuc  def VBROADCASTSSrm  : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
8048*0a6a1f1dSLionel Sambuc                                             f32mem, v4f32, loadf32, WriteLoad>;
8049*0a6a1f1dSLionel Sambuc  def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
8050*0a6a1f1dSLionel Sambuc                                             f32mem, v8f32, loadf32,
8051*0a6a1f1dSLionel Sambuc                                             WriteFShuffleLd>, VEX_L;
8052f4a2713aSLionel Sambuc}
8053f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in
8054*0a6a1f1dSLionel Sambucdef VBROADCASTSDYrm  : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
8055*0a6a1f1dSLionel Sambuc                                    v4f64, loadf64, WriteFShuffleLd>, VEX_L;
8056f4a2713aSLionel Sambucdef VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
8057*0a6a1f1dSLionel Sambuc                                   int_x86_avx_vbroadcastf128_pd_256,
8058*0a6a1f1dSLionel Sambuc                                   WriteFShuffleLd>, VEX_L;
8059f4a2713aSLionel Sambuc
8060f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
8061f4a2713aSLionel Sambuc  def VBROADCASTSSrr  : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
8062*0a6a1f1dSLionel Sambuc                                           int_x86_avx2_vbroadcast_ss_ps,
8063*0a6a1f1dSLionel Sambuc                                           WriteFShuffle>;
8064f4a2713aSLionel Sambuc  def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
8065*0a6a1f1dSLionel Sambuc                                      int_x86_avx2_vbroadcast_ss_ps_256,
8066*0a6a1f1dSLionel Sambuc                                      WriteFShuffle256>, VEX_L;
8067f4a2713aSLionel Sambuc}
8068f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in
8069f4a2713aSLionel Sambucdef VBROADCASTSDYrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
8070*0a6a1f1dSLionel Sambuc                                      int_x86_avx2_vbroadcast_sd_pd_256,
8071*0a6a1f1dSLionel Sambuc                                      WriteFShuffle256>, VEX_L;
8072f4a2713aSLionel Sambuc
8073f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in
8074f4a2713aSLionel Sambucdef VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
8075*0a6a1f1dSLionel Sambuc                                   int_x86_avx2_vbroadcasti128, WriteLoad>,
8076*0a6a1f1dSLionel Sambuc                                   VEX_L;
8077f4a2713aSLionel Sambuc
8078f4a2713aSLionel Sambuclet Predicates = [HasAVX] in
8079f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
8080f4a2713aSLionel Sambuc          (VBROADCASTF128 addr:$src)>;
8081f4a2713aSLionel Sambuc
8082f4a2713aSLionel Sambuc
8083f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8084f4a2713aSLionel Sambuc// VINSERTF128 - Insert packed floating-point values
8085f4a2713aSLionel Sambuc//
8086*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
8087f4a2713aSLionel Sambucdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
8088f4a2713aSLionel Sambuc          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
8089f4a2713aSLionel Sambuc          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8090*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteFShuffle]>, VEX_4V, VEX_L;
8091f4a2713aSLionel Sambuclet mayLoad = 1 in
8092f4a2713aSLionel Sambucdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
8093f4a2713aSLionel Sambuc          (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
8094f4a2713aSLionel Sambuc          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8095*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
8096f4a2713aSLionel Sambuc}
8097f4a2713aSLionel Sambuc
8098f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
8099f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
8100f4a2713aSLionel Sambuc                                   (iPTR imm)),
8101f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8102f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8103f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
8104f4a2713aSLionel Sambuc                                   (iPTR imm)),
8105f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8106f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8107f4a2713aSLionel Sambuc
8108f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
8109f4a2713aSLionel Sambuc                                   (iPTR imm)),
8110f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8111f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8112f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
8113f4a2713aSLionel Sambuc                                   (iPTR imm)),
8114f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8115f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8116f4a2713aSLionel Sambuc}
8117f4a2713aSLionel Sambuc
8118*0a6a1f1dSLionel Sambuc// Combine two consecutive 16-byte loads with a common destination register into
8119*0a6a1f1dSLionel Sambuc// one 32-byte load to that register.
8120*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, HasFastMem32] in {
8121*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8122*0a6a1f1dSLionel Sambuc              (v8f32 (insert_subvector undef, (loadv4f32 addr:$src), (iPTR 0))),
8123*0a6a1f1dSLionel Sambuc              (loadv4f32 (add addr:$src, (iPTR 16))),
8124*0a6a1f1dSLionel Sambuc              (iPTR 4)),
8125*0a6a1f1dSLionel Sambuc            (VMOVUPSYrm addr:$src)>;
8126*0a6a1f1dSLionel Sambuc
8127*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8128*0a6a1f1dSLionel Sambuc              (v4f64 (insert_subvector undef, (loadv2f64 addr:$src), (iPTR 0))),
8129*0a6a1f1dSLionel Sambuc              (loadv2f64 (add addr:$src, (iPTR 16))),
8130*0a6a1f1dSLionel Sambuc              (iPTR 2)),
8131*0a6a1f1dSLionel Sambuc            (VMOVUPDYrm addr:$src)>;
8132*0a6a1f1dSLionel Sambuc
8133*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8134*0a6a1f1dSLionel Sambuc              (v32i8 (insert_subvector
8135*0a6a1f1dSLionel Sambuc                undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
8136*0a6a1f1dSLionel Sambuc              (bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
8137*0a6a1f1dSLionel Sambuc              (iPTR 16)),
8138*0a6a1f1dSLionel Sambuc            (VMOVDQUYrm addr:$src)>;
8139*0a6a1f1dSLionel Sambuc
8140*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8141*0a6a1f1dSLionel Sambuc              (v16i16 (insert_subvector
8142*0a6a1f1dSLionel Sambuc                undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
8143*0a6a1f1dSLionel Sambuc              (bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
8144*0a6a1f1dSLionel Sambuc              (iPTR 8)),
8145*0a6a1f1dSLionel Sambuc            (VMOVDQUYrm addr:$src)>;
8146*0a6a1f1dSLionel Sambuc
8147*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8148*0a6a1f1dSLionel Sambuc              (v8i32 (insert_subvector
8149*0a6a1f1dSLionel Sambuc                undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),
8150*0a6a1f1dSLionel Sambuc              (bc_v4i32 (loadv2i64 (add addr:$src, (iPTR 16)))),
8151*0a6a1f1dSLionel Sambuc              (iPTR 4)),
8152*0a6a1f1dSLionel Sambuc            (VMOVDQUYrm addr:$src)>;
8153*0a6a1f1dSLionel Sambuc
8154*0a6a1f1dSLionel Sambuc  def : Pat<(insert_subvector
8155*0a6a1f1dSLionel Sambuc              (v4i64 (insert_subvector undef, (loadv2i64 addr:$src), (iPTR 0))),
8156*0a6a1f1dSLionel Sambuc              (loadv2i64 (add addr:$src, (iPTR 16))),
8157*0a6a1f1dSLionel Sambuc              (iPTR 2)),
8158*0a6a1f1dSLionel Sambuc            (VMOVDQUYrm addr:$src)>;
8159*0a6a1f1dSLionel Sambuc}
8160*0a6a1f1dSLionel Sambuc
8161f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
8162f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
8163f4a2713aSLionel Sambuc                                   (iPTR imm)),
8164f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8165f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8166f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
8167f4a2713aSLionel Sambuc                                   (iPTR imm)),
8168f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8169f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8170f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
8171f4a2713aSLionel Sambuc                                   (iPTR imm)),
8172f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8173f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8174f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
8175f4a2713aSLionel Sambuc                                   (iPTR imm)),
8176f4a2713aSLionel Sambuc          (VINSERTF128rr VR256:$src1, VR128:$src2,
8177f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8178f4a2713aSLionel Sambuc
8179f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
8180f4a2713aSLionel Sambuc                                   (iPTR imm)),
8181f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8182f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8183f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
8184f4a2713aSLionel Sambuc                                   (bc_v4i32 (loadv2i64 addr:$src2)),
8185f4a2713aSLionel Sambuc                                   (iPTR imm)),
8186f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8187f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8188f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
8189f4a2713aSLionel Sambuc                                   (bc_v16i8 (loadv2i64 addr:$src2)),
8190f4a2713aSLionel Sambuc                                   (iPTR imm)),
8191f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8192f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8193f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
8194f4a2713aSLionel Sambuc                                   (bc_v8i16 (loadv2i64 addr:$src2)),
8195f4a2713aSLionel Sambuc                                   (iPTR imm)),
8196f4a2713aSLionel Sambuc          (VINSERTF128rm VR256:$src1, addr:$src2,
8197f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8198f4a2713aSLionel Sambuc}
8199f4a2713aSLionel Sambuc
8200f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8201f4a2713aSLionel Sambuc// VEXTRACTF128 - Extract packed floating-point values
8202f4a2713aSLionel Sambuc//
8203*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
8204f4a2713aSLionel Sambucdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
8205f4a2713aSLionel Sambuc          (ins VR256:$src1, i8imm:$src2),
8206f4a2713aSLionel Sambuc          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8207*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteFShuffle]>, VEX, VEX_L;
8208f4a2713aSLionel Sambuclet mayStore = 1 in
8209f4a2713aSLionel Sambucdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
8210f4a2713aSLionel Sambuc          (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
8211f4a2713aSLionel Sambuc          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8212*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteStore]>, VEX, VEX_L;
8213f4a2713aSLionel Sambuc}
8214f4a2713aSLionel Sambuc
8215f4a2713aSLionel Sambuc// AVX1 patterns
8216f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
8217f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8218f4a2713aSLionel Sambuc          (v4f32 (VEXTRACTF128rr
8219f4a2713aSLionel Sambuc                    (v8f32 VR256:$src1),
8220f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8221f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8222f4a2713aSLionel Sambuc          (v2f64 (VEXTRACTF128rr
8223f4a2713aSLionel Sambuc                    (v4f64 VR256:$src1),
8224f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8225f4a2713aSLionel Sambuc
8226f4a2713aSLionel Sambucdef : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
8227f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8228f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8229f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8230f4a2713aSLionel Sambucdef : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
8231f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8232f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8233f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8234f4a2713aSLionel Sambuc}
8235f4a2713aSLionel Sambuc
8236f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
8237f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8238f4a2713aSLionel Sambuc          (v2i64 (VEXTRACTF128rr
8239f4a2713aSLionel Sambuc                  (v4i64 VR256:$src1),
8240f4a2713aSLionel Sambuc                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8241f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8242f4a2713aSLionel Sambuc          (v4i32 (VEXTRACTF128rr
8243f4a2713aSLionel Sambuc                  (v8i32 VR256:$src1),
8244f4a2713aSLionel Sambuc                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8245f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8246f4a2713aSLionel Sambuc          (v8i16 (VEXTRACTF128rr
8247f4a2713aSLionel Sambuc                  (v16i16 VR256:$src1),
8248f4a2713aSLionel Sambuc                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8249f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8250f4a2713aSLionel Sambuc          (v16i8 (VEXTRACTF128rr
8251f4a2713aSLionel Sambuc                  (v32i8 VR256:$src1),
8252f4a2713aSLionel Sambuc                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8253f4a2713aSLionel Sambuc
8254f4a2713aSLionel Sambucdef : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
8255f4a2713aSLionel Sambuc                                (iPTR imm))), addr:$dst),
8256f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8257f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8258f4a2713aSLionel Sambucdef : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
8259f4a2713aSLionel Sambuc                                (iPTR imm))), addr:$dst),
8260f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8261f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8262f4a2713aSLionel Sambucdef : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
8263f4a2713aSLionel Sambuc                                (iPTR imm))), addr:$dst),
8264f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8265f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8266f4a2713aSLionel Sambucdef : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
8267f4a2713aSLionel Sambuc                                (iPTR imm))), addr:$dst),
8268f4a2713aSLionel Sambuc          (VEXTRACTF128mr addr:$dst, VR256:$src1,
8269f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8270f4a2713aSLionel Sambuc}
8271f4a2713aSLionel Sambuc
8272f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8273f4a2713aSLionel Sambuc// VMASKMOV - Conditional SIMD Packed Loads and Stores
8274f4a2713aSLionel Sambuc//
8275f4a2713aSLionel Sambucmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
8276f4a2713aSLionel Sambuc                          Intrinsic IntLd, Intrinsic IntLd256,
8277f4a2713aSLionel Sambuc                          Intrinsic IntSt, Intrinsic IntSt256> {
8278f4a2713aSLionel Sambuc  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
8279f4a2713aSLionel Sambuc             (ins VR128:$src1, f128mem:$src2),
8280f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8281f4a2713aSLionel Sambuc             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
8282f4a2713aSLionel Sambuc             VEX_4V;
8283f4a2713aSLionel Sambuc  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
8284f4a2713aSLionel Sambuc             (ins VR256:$src1, f256mem:$src2),
8285f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8286f4a2713aSLionel Sambuc             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
8287f4a2713aSLionel Sambuc             VEX_4V, VEX_L;
8288f4a2713aSLionel Sambuc  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
8289f4a2713aSLionel Sambuc             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
8290f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8291f4a2713aSLionel Sambuc             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
8292f4a2713aSLionel Sambuc  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
8293f4a2713aSLionel Sambuc             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
8294f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8295f4a2713aSLionel Sambuc             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
8296f4a2713aSLionel Sambuc}
8297f4a2713aSLionel Sambuc
8298f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in
8299f4a2713aSLionel Sambucdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
8300f4a2713aSLionel Sambuc                                 int_x86_avx_maskload_ps,
8301f4a2713aSLionel Sambuc                                 int_x86_avx_maskload_ps_256,
8302f4a2713aSLionel Sambuc                                 int_x86_avx_maskstore_ps,
8303f4a2713aSLionel Sambuc                                 int_x86_avx_maskstore_ps_256>;
8304f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in
8305f4a2713aSLionel Sambucdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
8306f4a2713aSLionel Sambuc                                 int_x86_avx_maskload_pd,
8307f4a2713aSLionel Sambuc                                 int_x86_avx_maskload_pd_256,
8308f4a2713aSLionel Sambuc                                 int_x86_avx_maskstore_pd,
8309f4a2713aSLionel Sambuc                                 int_x86_avx_maskstore_pd_256>;
8310f4a2713aSLionel Sambuc
8311f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8312f4a2713aSLionel Sambuc// VPERMIL - Permute Single and Double Floating-Point Values
8313f4a2713aSLionel Sambuc//
8314f4a2713aSLionel Sambucmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
8315f4a2713aSLionel Sambuc                      RegisterClass RC, X86MemOperand x86memop_f,
8316f4a2713aSLionel Sambuc                      X86MemOperand x86memop_i, PatFrag i_frag,
8317f4a2713aSLionel Sambuc                      Intrinsic IntVar, ValueType vt> {
8318f4a2713aSLionel Sambuc  def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
8319f4a2713aSLionel Sambuc             (ins RC:$src1, RC:$src2),
8320f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8321*0a6a1f1dSLionel Sambuc             [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V,
8322*0a6a1f1dSLionel Sambuc             Sched<[WriteFShuffle]>;
8323f4a2713aSLionel Sambuc  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
8324f4a2713aSLionel Sambuc             (ins RC:$src1, x86memop_i:$src2),
8325f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8326f4a2713aSLionel Sambuc             [(set RC:$dst, (IntVar RC:$src1,
8327*0a6a1f1dSLionel Sambuc                             (bitconvert (i_frag addr:$src2))))]>, VEX_4V,
8328*0a6a1f1dSLionel Sambuc             Sched<[WriteFShuffleLd, ReadAfterLd]>;
8329f4a2713aSLionel Sambuc
8330f4a2713aSLionel Sambuc  def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
8331f4a2713aSLionel Sambuc             (ins RC:$src1, i8imm:$src2),
8332f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8333*0a6a1f1dSLionel Sambuc             [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
8334*0a6a1f1dSLionel Sambuc             Sched<[WriteFShuffle]>;
8335f4a2713aSLionel Sambuc  def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
8336f4a2713aSLionel Sambuc             (ins x86memop_f:$src1, i8imm:$src2),
8337f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8338f4a2713aSLionel Sambuc             [(set RC:$dst,
8339*0a6a1f1dSLionel Sambuc               (vt (X86VPermilpi (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
8340*0a6a1f1dSLionel Sambuc             Sched<[WriteFShuffleLd]>;
8341f4a2713aSLionel Sambuc}
8342f4a2713aSLionel Sambuc
8343f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
8344f4a2713aSLionel Sambuc  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
8345f4a2713aSLionel Sambuc                               loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
8346f4a2713aSLionel Sambuc  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
8347f4a2713aSLionel Sambuc                       loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
8348f4a2713aSLionel Sambuc}
8349f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in {
8350f4a2713aSLionel Sambuc  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
8351f4a2713aSLionel Sambuc                               loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
8352f4a2713aSLionel Sambuc  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
8353f4a2713aSLionel Sambuc                       loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
8354f4a2713aSLionel Sambuc}
8355f4a2713aSLionel Sambuc
8356f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
8357*0a6a1f1dSLionel Sambucdef : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
8358*0a6a1f1dSLionel Sambuc          (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
8359*0a6a1f1dSLionel Sambucdef : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
8360*0a6a1f1dSLionel Sambuc          (VPERMILPSYrm VR256:$src1, addr:$src2)>;
8361*0a6a1f1dSLionel Sambucdef : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))),
8362*0a6a1f1dSLionel Sambuc          (VPERMILPDYrr VR256:$src1, VR256:$src2)>;
8363*0a6a1f1dSLionel Sambucdef : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))),
8364*0a6a1f1dSLionel Sambuc          (VPERMILPDYrm VR256:$src1, addr:$src2)>;
8365*0a6a1f1dSLionel Sambuc
8366*0a6a1f1dSLionel Sambucdef : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
8367f4a2713aSLionel Sambuc          (VPERMILPSYri VR256:$src1, imm:$imm)>;
8368*0a6a1f1dSLionel Sambucdef : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
8369f4a2713aSLionel Sambuc          (VPERMILPDYri VR256:$src1, imm:$imm)>;
8370*0a6a1f1dSLionel Sambucdef : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)),
8371f4a2713aSLionel Sambuc                               (i8 imm:$imm))),
8372f4a2713aSLionel Sambuc          (VPERMILPSYmi addr:$src1, imm:$imm)>;
8373*0a6a1f1dSLionel Sambucdef : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))),
8374f4a2713aSLionel Sambuc          (VPERMILPDYmi addr:$src1, imm:$imm)>;
8375f4a2713aSLionel Sambuc
8376*0a6a1f1dSLionel Sambucdef : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))),
8377*0a6a1f1dSLionel Sambuc          (VPERMILPSrr VR128:$src1, VR128:$src2)>;
8378*0a6a1f1dSLionel Sambucdef : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
8379*0a6a1f1dSLionel Sambuc          (VPERMILPSrm VR128:$src1, addr:$src2)>;
8380*0a6a1f1dSLionel Sambucdef : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))),
8381*0a6a1f1dSLionel Sambuc          (VPERMILPDrr VR128:$src1, VR128:$src2)>;
8382*0a6a1f1dSLionel Sambucdef : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))),
8383*0a6a1f1dSLionel Sambuc          (VPERMILPDrm VR128:$src1, addr:$src2)>;
8384*0a6a1f1dSLionel Sambuc
8385*0a6a1f1dSLionel Sambucdef : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))),
8386f4a2713aSLionel Sambuc          (VPERMILPDri VR128:$src1, imm:$imm)>;
8387*0a6a1f1dSLionel Sambucdef : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
8388f4a2713aSLionel Sambuc          (VPERMILPDmi addr:$src1, imm:$imm)>;
8389f4a2713aSLionel Sambuc}
8390f4a2713aSLionel Sambuc
8391f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8392f4a2713aSLionel Sambuc// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
8393f4a2713aSLionel Sambuc//
8394f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in {
8395f4a2713aSLionel Sambucdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
8396f4a2713aSLionel Sambuc          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
8397f4a2713aSLionel Sambuc          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8398f4a2713aSLionel Sambuc          [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
8399*0a6a1f1dSLionel Sambuc                              (i8 imm:$src3))))]>, VEX_4V, VEX_L,
8400*0a6a1f1dSLionel Sambuc          Sched<[WriteFShuffle]>;
8401f4a2713aSLionel Sambucdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
8402f4a2713aSLionel Sambuc          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
8403f4a2713aSLionel Sambuc          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8404f4a2713aSLionel Sambuc          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2),
8405*0a6a1f1dSLionel Sambuc                             (i8 imm:$src3)))]>, VEX_4V, VEX_L,
8406*0a6a1f1dSLionel Sambuc          Sched<[WriteFShuffleLd, ReadAfterLd]>;
8407f4a2713aSLionel Sambuc}
8408f4a2713aSLionel Sambuc
8409f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
8410f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8411f4a2713aSLionel Sambuc          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8412f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
8413f4a2713aSLionel Sambuc                  (loadv4f64 addr:$src2), (i8 imm:$imm))),
8414f4a2713aSLionel Sambuc          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
8415f4a2713aSLionel Sambuc}
8416f4a2713aSLionel Sambuc
8417f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
8418f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8419f4a2713aSLionel Sambuc          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8420f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8421f4a2713aSLionel Sambuc          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8422f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8423f4a2713aSLionel Sambuc          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8424f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8425f4a2713aSLionel Sambuc          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8426f4a2713aSLionel Sambuc
8427f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
8428f4a2713aSLionel Sambuc                  (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
8429f4a2713aSLionel Sambuc          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
8430f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
8431f4a2713aSLionel Sambuc                  (loadv4i64 addr:$src2), (i8 imm:$imm))),
8432f4a2713aSLionel Sambuc          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
8433f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
8434f4a2713aSLionel Sambuc                  (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
8435f4a2713aSLionel Sambuc          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
8436f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
8437f4a2713aSLionel Sambuc                  (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
8438f4a2713aSLionel Sambuc          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
8439f4a2713aSLionel Sambuc}
8440f4a2713aSLionel Sambuc
8441f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8442f4a2713aSLionel Sambuc// VZERO - Zero YMM registers
8443f4a2713aSLionel Sambuc//
8444f4a2713aSLionel Sambuclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
8445f4a2713aSLionel Sambuc            YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
8446f4a2713aSLionel Sambuc  // Zero All YMM registers
8447f4a2713aSLionel Sambuc  def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
8448*0a6a1f1dSLionel Sambuc                  [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>;
8449f4a2713aSLionel Sambuc
8450f4a2713aSLionel Sambuc  // Zero Upper bits of YMM registers
8451f4a2713aSLionel Sambuc  def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
8452*0a6a1f1dSLionel Sambuc                     [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>;
8453f4a2713aSLionel Sambuc}
8454f4a2713aSLionel Sambuc
8455f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8456f4a2713aSLionel Sambuc// Half precision conversion instructions
8457f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8458f4a2713aSLionel Sambucmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
8459f4a2713aSLionel Sambuc  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
8460f4a2713aSLionel Sambuc             "vcvtph2ps\t{$src, $dst|$dst, $src}",
8461f4a2713aSLionel Sambuc             [(set RC:$dst, (Int VR128:$src))]>,
8462*0a6a1f1dSLionel Sambuc             T8PD, VEX, Sched<[WriteCvtF2F]>;
8463*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0, mayLoad = 1 in
8464f4a2713aSLionel Sambuc  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
8465*0a6a1f1dSLionel Sambuc             "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX,
8466*0a6a1f1dSLionel Sambuc             Sched<[WriteCvtF2FLd]>;
8467f4a2713aSLionel Sambuc}
8468f4a2713aSLionel Sambuc
8469f4a2713aSLionel Sambucmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
8470f4a2713aSLionel Sambuc  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
8471f4a2713aSLionel Sambuc               (ins RC:$src1, i32i8imm:$src2),
8472f4a2713aSLionel Sambuc               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8473f4a2713aSLionel Sambuc               [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
8474*0a6a1f1dSLionel Sambuc               TAPD, VEX, Sched<[WriteCvtF2F]>;
8475*0a6a1f1dSLionel Sambuc  let hasSideEffects = 0, mayStore = 1,
8476*0a6a1f1dSLionel Sambuc      SchedRW = [WriteCvtF2FLd, WriteRMW] in
8477f4a2713aSLionel Sambuc  def mr : Ii8<0x1D, MRMDestMem, (outs),
8478f4a2713aSLionel Sambuc               (ins x86memop:$dst, RC:$src1, i32i8imm:$src2),
8479f4a2713aSLionel Sambuc               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8480*0a6a1f1dSLionel Sambuc               TAPD, VEX;
8481f4a2713aSLionel Sambuc}
8482f4a2713aSLionel Sambuc
8483f4a2713aSLionel Sambuclet Predicates = [HasF16C] in {
8484f4a2713aSLionel Sambuc  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
8485f4a2713aSLionel Sambuc  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
8486f4a2713aSLionel Sambuc  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
8487f4a2713aSLionel Sambuc  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
8488*0a6a1f1dSLionel Sambuc
8489*0a6a1f1dSLionel Sambuc  // Pattern match vcvtph2ps of a scalar i64 load.
8490*0a6a1f1dSLionel Sambuc  def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)),
8491*0a6a1f1dSLionel Sambuc            (VCVTPH2PSrm addr:$src)>;
8492*0a6a1f1dSLionel Sambuc  def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
8493*0a6a1f1dSLionel Sambuc            (VCVTPH2PSrm addr:$src)>;
8494*0a6a1f1dSLionel Sambuc}
8495*0a6a1f1dSLionel Sambuc
8496*0a6a1f1dSLionel Sambuc// Patterns for  matching conversions from float to half-float and vice versa.
8497*0a6a1f1dSLionel Sambuclet Predicates = [HasF16C] in {
8498*0a6a1f1dSLionel Sambuc  def : Pat<(fp_to_f16 FR32:$src),
8499*0a6a1f1dSLionel Sambuc            (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
8500*0a6a1f1dSLionel Sambuc              (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
8501*0a6a1f1dSLionel Sambuc
8502*0a6a1f1dSLionel Sambuc  def : Pat<(f16_to_fp GR16:$src),
8503*0a6a1f1dSLionel Sambuc            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
8504*0a6a1f1dSLionel Sambuc              (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
8505*0a6a1f1dSLionel Sambuc
8506*0a6a1f1dSLionel Sambuc  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
8507*0a6a1f1dSLionel Sambuc            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
8508*0a6a1f1dSLionel Sambuc              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
8509f4a2713aSLionel Sambuc}
8510f4a2713aSLionel Sambuc
8511f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8512f4a2713aSLionel Sambuc// AVX2 Instructions
8513f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8514f4a2713aSLionel Sambuc
8515f4a2713aSLionel Sambuc/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
8516f4a2713aSLionel Sambucmulticlass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
8517f4a2713aSLionel Sambuc                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
8518f4a2713aSLionel Sambuc                 X86MemOperand x86memop> {
8519f4a2713aSLionel Sambuc  let isCommutable = 1 in
8520f4a2713aSLionel Sambuc  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
8521*0a6a1f1dSLionel Sambuc        (ins RC:$src1, RC:$src2, i8imm:$src3),
8522f4a2713aSLionel Sambuc        !strconcat(OpcodeStr,
8523f4a2713aSLionel Sambuc            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
8524f4a2713aSLionel Sambuc        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
8525*0a6a1f1dSLionel Sambuc        Sched<[WriteBlend]>, VEX_4V;
8526f4a2713aSLionel Sambuc  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
8527*0a6a1f1dSLionel Sambuc        (ins RC:$src1, x86memop:$src2, i8imm:$src3),
8528f4a2713aSLionel Sambuc        !strconcat(OpcodeStr,
8529f4a2713aSLionel Sambuc            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
8530f4a2713aSLionel Sambuc        [(set RC:$dst,
8531f4a2713aSLionel Sambuc          (IntId RC:$src1,
8532f4a2713aSLionel Sambuc           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
8533*0a6a1f1dSLionel Sambuc        Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
8534f4a2713aSLionel Sambuc}
8535f4a2713aSLionel Sambuc
8536f4a2713aSLionel Sambucdefm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
8537f4a2713aSLionel Sambuc                                   VR128, loadv2i64, i128mem>;
8538f4a2713aSLionel Sambucdefm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
8539f4a2713aSLionel Sambuc                                    VR256, loadv4i64, i256mem>, VEX_L;
8540f4a2713aSLionel Sambuc
8541f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
8542f4a2713aSLionel Sambuc                  imm:$mask)),
8543f4a2713aSLionel Sambuc          (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
8544f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
8545f4a2713aSLionel Sambuc                  imm:$mask)),
8546f4a2713aSLionel Sambuc          (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
8547f4a2713aSLionel Sambuc
8548f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8549f4a2713aSLionel Sambuc// VPBROADCAST - Load from memory and broadcast to all elements of the
8550f4a2713aSLionel Sambuc//               destination operand
8551f4a2713aSLionel Sambuc//
8552f4a2713aSLionel Sambucmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr,
8553f4a2713aSLionel Sambuc                          X86MemOperand x86memop, PatFrag ld_frag,
8554f4a2713aSLionel Sambuc                          Intrinsic Int128, Intrinsic Int256> {
8555f4a2713aSLionel Sambuc  def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
8556f4a2713aSLionel Sambuc                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8557*0a6a1f1dSLionel Sambuc                  [(set VR128:$dst, (Int128 VR128:$src))]>,
8558*0a6a1f1dSLionel Sambuc                  Sched<[WriteShuffle]>, VEX;
8559f4a2713aSLionel Sambuc  def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
8560f4a2713aSLionel Sambuc                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8561f4a2713aSLionel Sambuc                  [(set VR128:$dst,
8562*0a6a1f1dSLionel Sambuc                    (Int128 (scalar_to_vector (ld_frag addr:$src))))]>,
8563*0a6a1f1dSLionel Sambuc                  Sched<[WriteLoad]>, VEX;
8564f4a2713aSLionel Sambuc  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
8565f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8566*0a6a1f1dSLionel Sambuc                   [(set VR256:$dst, (Int256 VR128:$src))]>,
8567*0a6a1f1dSLionel Sambuc                   Sched<[WriteShuffle256]>, VEX, VEX_L;
8568f4a2713aSLionel Sambuc  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
8569f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8570f4a2713aSLionel Sambuc                   [(set VR256:$dst,
8571f4a2713aSLionel Sambuc                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
8572*0a6a1f1dSLionel Sambuc                   Sched<[WriteLoad]>, VEX, VEX_L;
8573f4a2713aSLionel Sambuc}
8574f4a2713aSLionel Sambuc
8575f4a2713aSLionel Sambucdefm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
8576f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastb_128,
8577f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastb_256>;
8578f4a2713aSLionel Sambucdefm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
8579f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastw_128,
8580f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastw_256>;
8581f4a2713aSLionel Sambucdefm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
8582f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastd_128,
8583f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastd_256>;
8584f4a2713aSLionel Sambucdefm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
8585f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastq_128,
8586f4a2713aSLionel Sambuc                                    int_x86_avx2_pbroadcastq_256>;
8587f4a2713aSLionel Sambuc
8588f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
8589f4a2713aSLionel Sambuc  def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
8590f4a2713aSLionel Sambuc          (VPBROADCASTBrm addr:$src)>;
8591f4a2713aSLionel Sambuc  def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
8592f4a2713aSLionel Sambuc          (VPBROADCASTBYrm addr:$src)>;
8593f4a2713aSLionel Sambuc  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
8594f4a2713aSLionel Sambuc          (VPBROADCASTWrm addr:$src)>;
8595f4a2713aSLionel Sambuc  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
8596f4a2713aSLionel Sambuc          (VPBROADCASTWYrm addr:$src)>;
8597f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
8598f4a2713aSLionel Sambuc          (VPBROADCASTDrm addr:$src)>;
8599f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
8600f4a2713aSLionel Sambuc          (VPBROADCASTDYrm addr:$src)>;
8601f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
8602f4a2713aSLionel Sambuc          (VPBROADCASTQrm addr:$src)>;
8603f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
8604f4a2713aSLionel Sambuc          (VPBROADCASTQYrm addr:$src)>;
8605f4a2713aSLionel Sambuc
8606f4a2713aSLionel Sambuc  def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
8607f4a2713aSLionel Sambuc          (VPBROADCASTBrr VR128:$src)>;
8608f4a2713aSLionel Sambuc  def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
8609f4a2713aSLionel Sambuc          (VPBROADCASTBYrr VR128:$src)>;
8610f4a2713aSLionel Sambuc  def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
8611f4a2713aSLionel Sambuc          (VPBROADCASTWrr VR128:$src)>;
8612f4a2713aSLionel Sambuc  def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
8613f4a2713aSLionel Sambuc          (VPBROADCASTWYrr VR128:$src)>;
8614f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
8615f4a2713aSLionel Sambuc          (VPBROADCASTDrr VR128:$src)>;
8616f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
8617f4a2713aSLionel Sambuc          (VPBROADCASTDYrr VR128:$src)>;
8618f4a2713aSLionel Sambuc  def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
8619f4a2713aSLionel Sambuc          (VPBROADCASTQrr VR128:$src)>;
8620f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
8621f4a2713aSLionel Sambuc          (VPBROADCASTQYrr VR128:$src)>;
8622f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
8623f4a2713aSLionel Sambuc          (VBROADCASTSSrr VR128:$src)>;
8624f4a2713aSLionel Sambuc  def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
8625f4a2713aSLionel Sambuc          (VBROADCASTSSYrr VR128:$src)>;
8626f4a2713aSLionel Sambuc  def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
8627f4a2713aSLionel Sambuc          (VPBROADCASTQrr VR128:$src)>;
8628f4a2713aSLionel Sambuc  def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
8629f4a2713aSLionel Sambuc          (VBROADCASTSDYrr VR128:$src)>;
8630f4a2713aSLionel Sambuc
8631*0a6a1f1dSLionel Sambuc  // Provide aliases for broadcast from the same regitser class that
8632*0a6a1f1dSLionel Sambuc  // automatically does the extract.
8633*0a6a1f1dSLionel Sambuc  def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
8634*0a6a1f1dSLionel Sambuc            (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
8635*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8636*0a6a1f1dSLionel Sambuc  def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
8637*0a6a1f1dSLionel Sambuc            (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
8638*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8639*0a6a1f1dSLionel Sambuc  def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
8640*0a6a1f1dSLionel Sambuc            (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
8641*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8642*0a6a1f1dSLionel Sambuc  def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
8643*0a6a1f1dSLionel Sambuc            (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
8644*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8645*0a6a1f1dSLionel Sambuc  def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
8646*0a6a1f1dSLionel Sambuc            (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
8647*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8648*0a6a1f1dSLionel Sambuc  def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))),
8649*0a6a1f1dSLionel Sambuc            (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
8650*0a6a1f1dSLionel Sambuc                                                    sub_xmm)))>;
8651*0a6a1f1dSLionel Sambuc
8652f4a2713aSLionel Sambuc  // Provide fallback in case the load node that is used in the patterns above
8653f4a2713aSLionel Sambuc  // is used by additional users, which prevents the pattern selection.
8654f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
8655f4a2713aSLionel Sambuc    def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
8656f4a2713aSLionel Sambuc              (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
8657f4a2713aSLionel Sambuc    def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
8658f4a2713aSLionel Sambuc              (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
8659f4a2713aSLionel Sambuc    def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
8660f4a2713aSLionel Sambuc              (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
8661f4a2713aSLionel Sambuc
8662f4a2713aSLionel Sambuc    def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
8663f4a2713aSLionel Sambuc              (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
8664f4a2713aSLionel Sambuc    def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
8665f4a2713aSLionel Sambuc              (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
8666f4a2713aSLionel Sambuc    def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
8667f4a2713aSLionel Sambuc              (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
8668*0a6a1f1dSLionel Sambuc
8669*0a6a1f1dSLionel Sambuc    def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
8670*0a6a1f1dSLionel Sambuc          (VPBROADCASTBrr (COPY_TO_REGCLASS
8671*0a6a1f1dSLionel Sambuc                           (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
8672*0a6a1f1dSLionel Sambuc                           VR128))>;
8673*0a6a1f1dSLionel Sambuc    def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
8674*0a6a1f1dSLionel Sambuc          (VPBROADCASTBYrr (COPY_TO_REGCLASS
8675*0a6a1f1dSLionel Sambuc                            (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
8676*0a6a1f1dSLionel Sambuc                            VR128))>;
8677*0a6a1f1dSLionel Sambuc
8678*0a6a1f1dSLionel Sambuc    def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
8679*0a6a1f1dSLionel Sambuc          (VPBROADCASTWrr (COPY_TO_REGCLASS
8680*0a6a1f1dSLionel Sambuc                           (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
8681*0a6a1f1dSLionel Sambuc                           VR128))>;
8682*0a6a1f1dSLionel Sambuc    def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
8683*0a6a1f1dSLionel Sambuc          (VPBROADCASTWYrr (COPY_TO_REGCLASS
8684*0a6a1f1dSLionel Sambuc                            (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
8685*0a6a1f1dSLionel Sambuc                            VR128))>;
8686*0a6a1f1dSLionel Sambuc
8687*0a6a1f1dSLionel Sambuc    // The patterns for VPBROADCASTD are not needed because they would match
8688*0a6a1f1dSLionel Sambuc    // the exact same thing as VBROADCASTSS patterns.
8689*0a6a1f1dSLionel Sambuc
8690*0a6a1f1dSLionel Sambuc    def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
8691*0a6a1f1dSLionel Sambuc          (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
8692*0a6a1f1dSLionel Sambuc    // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
8693f4a2713aSLionel Sambuc  }
8694f4a2713aSLionel Sambuc}
8695f4a2713aSLionel Sambuc
8696f4a2713aSLionel Sambuc// AVX1 broadcast patterns
8697f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in {
8698f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
8699f4a2713aSLionel Sambuc          (VBROADCASTSSYrm addr:$src)>;
8700f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
8701f4a2713aSLionel Sambuc          (VBROADCASTSDYrm addr:$src)>;
8702f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
8703f4a2713aSLionel Sambuc          (VBROADCASTSSrm addr:$src)>;
8704f4a2713aSLionel Sambuc}
8705f4a2713aSLionel Sambuc
8706f4a2713aSLionel Sambuclet Predicates = [HasAVX] in {
8707f4a2713aSLionel Sambuc  // Provide fallback in case the load node that is used in the patterns above
8708f4a2713aSLionel Sambuc  // is used by additional users, which prevents the pattern selection.
8709f4a2713aSLionel Sambuc  let AddedComplexity = 20 in {
8710f4a2713aSLionel Sambuc  // 128bit broadcasts:
8711f4a2713aSLionel Sambuc  def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
8712f4a2713aSLionel Sambuc            (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
8713f4a2713aSLionel Sambuc  def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
8714f4a2713aSLionel Sambuc            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
8715f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
8716f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
8717f4a2713aSLionel Sambuc  def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
8718f4a2713aSLionel Sambuc            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
8719f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
8720f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
8721f4a2713aSLionel Sambuc
8722f4a2713aSLionel Sambuc  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
8723f4a2713aSLionel Sambuc            (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
8724f4a2713aSLionel Sambuc  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
8725f4a2713aSLionel Sambuc            (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
8726f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
8727f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
8728f4a2713aSLionel Sambuc  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
8729f4a2713aSLionel Sambuc            (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
8730f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
8731f4a2713aSLionel Sambuc              (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
8732f4a2713aSLionel Sambuc  }
8733*0a6a1f1dSLionel Sambuc
8734*0a6a1f1dSLionel Sambuc  def : Pat<(v2f64 (X86VBroadcast f64:$src)),
8735*0a6a1f1dSLionel Sambuc            (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
8736f4a2713aSLionel Sambuc}
8737f4a2713aSLionel Sambuc
8738f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8739f4a2713aSLionel Sambuc// VPERM - Permute instructions
8740f4a2713aSLionel Sambuc//
8741f4a2713aSLionel Sambuc
8742f4a2713aSLionel Sambucmulticlass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
8743*0a6a1f1dSLionel Sambuc                     ValueType OpVT, X86FoldableSchedWrite Sched> {
8744f4a2713aSLionel Sambuc  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
8745f4a2713aSLionel Sambuc                   (ins VR256:$src1, VR256:$src2),
8746f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr,
8747f4a2713aSLionel Sambuc                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8748f4a2713aSLionel Sambuc                   [(set VR256:$dst,
8749f4a2713aSLionel Sambuc                     (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
8750*0a6a1f1dSLionel Sambuc                   Sched<[Sched]>, VEX_4V, VEX_L;
8751f4a2713aSLionel Sambuc  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
8752f4a2713aSLionel Sambuc                   (ins VR256:$src1, i256mem:$src2),
8753f4a2713aSLionel Sambuc                   !strconcat(OpcodeStr,
8754f4a2713aSLionel Sambuc                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8755f4a2713aSLionel Sambuc                   [(set VR256:$dst,
8756f4a2713aSLionel Sambuc                     (OpVT (X86VPermv VR256:$src1,
8757f4a2713aSLionel Sambuc                            (bitconvert (mem_frag addr:$src2)))))]>,
8758*0a6a1f1dSLionel Sambuc                   Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
8759f4a2713aSLionel Sambuc}
8760f4a2713aSLionel Sambuc
8761*0a6a1f1dSLionel Sambucdefm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>;
8762f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in
8763*0a6a1f1dSLionel Sambucdefm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
8764f4a2713aSLionel Sambuc
8765f4a2713aSLionel Sambucmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
8766*0a6a1f1dSLionel Sambuc                         ValueType OpVT, X86FoldableSchedWrite Sched> {
8767f4a2713aSLionel Sambuc  def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
8768f4a2713aSLionel Sambuc                     (ins VR256:$src1, i8imm:$src2),
8769f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr,
8770f4a2713aSLionel Sambuc                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8771f4a2713aSLionel Sambuc                     [(set VR256:$dst,
8772f4a2713aSLionel Sambuc                       (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
8773*0a6a1f1dSLionel Sambuc                     Sched<[Sched]>, VEX, VEX_L;
8774f4a2713aSLionel Sambuc  def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
8775f4a2713aSLionel Sambuc                     (ins i256mem:$src1, i8imm:$src2),
8776f4a2713aSLionel Sambuc                     !strconcat(OpcodeStr,
8777f4a2713aSLionel Sambuc                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8778f4a2713aSLionel Sambuc                     [(set VR256:$dst,
8779f4a2713aSLionel Sambuc                       (OpVT (X86VPermi (mem_frag addr:$src1),
8780*0a6a1f1dSLionel Sambuc                              (i8 imm:$src2))))]>,
8781*0a6a1f1dSLionel Sambuc                     Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
8782f4a2713aSLionel Sambuc}
8783f4a2713aSLionel Sambuc
8784*0a6a1f1dSLionel Sambucdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
8785*0a6a1f1dSLionel Sambuc                            WriteShuffle256>, VEX_W;
8786f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in
8787*0a6a1f1dSLionel Sambucdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
8788*0a6a1f1dSLionel Sambuc                             WriteFShuffle256>, VEX_W;
8789f4a2713aSLionel Sambuc
8790f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8791f4a2713aSLionel Sambuc// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
8792f4a2713aSLionel Sambuc//
8793f4a2713aSLionel Sambucdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
8794f4a2713aSLionel Sambuc          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
8795f4a2713aSLionel Sambuc          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8796f4a2713aSLionel Sambuc          [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
8797*0a6a1f1dSLionel Sambuc                            (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
8798*0a6a1f1dSLionel Sambuc          VEX_4V, VEX_L;
8799f4a2713aSLionel Sambucdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
8800f4a2713aSLionel Sambuc          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
8801f4a2713aSLionel Sambuc          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8802f4a2713aSLionel Sambuc          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
8803*0a6a1f1dSLionel Sambuc                             (i8 imm:$src3)))]>,
8804*0a6a1f1dSLionel Sambuc          Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
8805f4a2713aSLionel Sambuc
8806f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
8807f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8808f4a2713aSLionel Sambuc          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8809f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8810f4a2713aSLionel Sambuc          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8811f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
8812f4a2713aSLionel Sambuc          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
8813f4a2713aSLionel Sambuc
8814f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)),
8815f4a2713aSLionel Sambuc                  (i8 imm:$imm))),
8816f4a2713aSLionel Sambuc          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
8817f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
8818f4a2713aSLionel Sambuc                   (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
8819f4a2713aSLionel Sambuc          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
8820f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)),
8821f4a2713aSLionel Sambuc                  (i8 imm:$imm))),
8822f4a2713aSLionel Sambuc          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
8823f4a2713aSLionel Sambuc}
8824f4a2713aSLionel Sambuc
8825f4a2713aSLionel Sambuc
8826f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8827f4a2713aSLionel Sambuc// VINSERTI128 - Insert packed integer values
8828f4a2713aSLionel Sambuc//
8829*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in {
8830f4a2713aSLionel Sambucdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
8831f4a2713aSLionel Sambuc          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
8832f4a2713aSLionel Sambuc          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8833*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
8834f4a2713aSLionel Sambuclet mayLoad = 1 in
8835f4a2713aSLionel Sambucdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
8836f4a2713aSLionel Sambuc          (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
8837f4a2713aSLionel Sambuc          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8838*0a6a1f1dSLionel Sambuc          []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
8839f4a2713aSLionel Sambuc}
8840f4a2713aSLionel Sambuc
8841f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
8842f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
8843f4a2713aSLionel Sambuc                                   (iPTR imm)),
8844f4a2713aSLionel Sambuc          (VINSERTI128rr VR256:$src1, VR128:$src2,
8845f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8846f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
8847f4a2713aSLionel Sambuc                                   (iPTR imm)),
8848f4a2713aSLionel Sambuc          (VINSERTI128rr VR256:$src1, VR128:$src2,
8849f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8850f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
8851f4a2713aSLionel Sambuc                                   (iPTR imm)),
8852f4a2713aSLionel Sambuc          (VINSERTI128rr VR256:$src1, VR128:$src2,
8853f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8854f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
8855f4a2713aSLionel Sambuc                                   (iPTR imm)),
8856f4a2713aSLionel Sambuc          (VINSERTI128rr VR256:$src1, VR128:$src2,
8857f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8858f4a2713aSLionel Sambuc
8859f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
8860f4a2713aSLionel Sambuc                                   (iPTR imm)),
8861f4a2713aSLionel Sambuc          (VINSERTI128rm VR256:$src1, addr:$src2,
8862f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8863f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
8864f4a2713aSLionel Sambuc                                   (bc_v4i32 (loadv2i64 addr:$src2)),
8865f4a2713aSLionel Sambuc                                   (iPTR imm)),
8866f4a2713aSLionel Sambuc          (VINSERTI128rm VR256:$src1, addr:$src2,
8867f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8868f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
8869f4a2713aSLionel Sambuc                                   (bc_v16i8 (loadv2i64 addr:$src2)),
8870f4a2713aSLionel Sambuc                                   (iPTR imm)),
8871f4a2713aSLionel Sambuc          (VINSERTI128rm VR256:$src1, addr:$src2,
8872f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8873f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
8874f4a2713aSLionel Sambuc                                   (bc_v8i16 (loadv2i64 addr:$src2)),
8875f4a2713aSLionel Sambuc                                   (iPTR imm)),
8876f4a2713aSLionel Sambuc          (VINSERTI128rm VR256:$src1, addr:$src2,
8877f4a2713aSLionel Sambuc                         (INSERT_get_vinsert128_imm VR256:$ins))>;
8878f4a2713aSLionel Sambuc}
8879f4a2713aSLionel Sambuc
8880f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8881f4a2713aSLionel Sambuc// VEXTRACTI128 - Extract packed integer values
8882f4a2713aSLionel Sambuc//
8883f4a2713aSLionel Sambucdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
8884f4a2713aSLionel Sambuc          (ins VR256:$src1, i8imm:$src2),
8885f4a2713aSLionel Sambuc          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8886f4a2713aSLionel Sambuc          [(set VR128:$dst,
8887f4a2713aSLionel Sambuc            (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
8888*0a6a1f1dSLionel Sambuc          Sched<[WriteShuffle256]>, VEX, VEX_L;
8889*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayStore = 1 in
8890f4a2713aSLionel Sambucdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
8891f4a2713aSLionel Sambuc          (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
8892f4a2713aSLionel Sambuc          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8893*0a6a1f1dSLionel Sambuc          Sched<[WriteStore]>, VEX, VEX_L;
8894f4a2713aSLionel Sambuc
8895f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in {
8896f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8897f4a2713aSLionel Sambuc          (v2i64 (VEXTRACTI128rr
8898f4a2713aSLionel Sambuc                    (v4i64 VR256:$src1),
8899f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8900f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8901f4a2713aSLionel Sambuc          (v4i32 (VEXTRACTI128rr
8902f4a2713aSLionel Sambuc                    (v8i32 VR256:$src1),
8903f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8904f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8905f4a2713aSLionel Sambuc          (v8i16 (VEXTRACTI128rr
8906f4a2713aSLionel Sambuc                    (v16i16 VR256:$src1),
8907f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8908f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
8909f4a2713aSLionel Sambuc          (v16i8 (VEXTRACTI128rr
8910f4a2713aSLionel Sambuc                    (v32i8 VR256:$src1),
8911f4a2713aSLionel Sambuc                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
8912f4a2713aSLionel Sambuc
8913f4a2713aSLionel Sambucdef : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
8914f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8915f4a2713aSLionel Sambuc          (VEXTRACTI128mr addr:$dst, VR256:$src1,
8916f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8917f4a2713aSLionel Sambucdef : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
8918f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8919f4a2713aSLionel Sambuc          (VEXTRACTI128mr addr:$dst, VR256:$src1,
8920f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8921f4a2713aSLionel Sambucdef : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
8922f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8923f4a2713aSLionel Sambuc          (VEXTRACTI128mr addr:$dst, VR256:$src1,
8924f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8925f4a2713aSLionel Sambucdef : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
8926f4a2713aSLionel Sambuc                         (iPTR imm))), addr:$dst),
8927f4a2713aSLionel Sambuc          (VEXTRACTI128mr addr:$dst, VR256:$src1,
8928f4a2713aSLionel Sambuc           (EXTRACT_get_vextract128_imm VR128:$ext))>;
8929f4a2713aSLionel Sambuc}
8930f4a2713aSLionel Sambuc
8931f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
8932f4a2713aSLionel Sambuc// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
8933f4a2713aSLionel Sambuc//
8934f4a2713aSLionel Sambucmulticlass avx2_pmovmask<string OpcodeStr,
8935f4a2713aSLionel Sambuc                         Intrinsic IntLd128, Intrinsic IntLd256,
8936f4a2713aSLionel Sambuc                         Intrinsic IntSt128, Intrinsic IntSt256> {
8937f4a2713aSLionel Sambuc  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
8938f4a2713aSLionel Sambuc             (ins VR128:$src1, i128mem:$src2),
8939f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8940f4a2713aSLionel Sambuc             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
8941f4a2713aSLionel Sambuc  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
8942f4a2713aSLionel Sambuc             (ins VR256:$src1, i256mem:$src2),
8943f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8944f4a2713aSLionel Sambuc             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
8945f4a2713aSLionel Sambuc             VEX_4V, VEX_L;
8946f4a2713aSLionel Sambuc  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
8947f4a2713aSLionel Sambuc             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
8948f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8949f4a2713aSLionel Sambuc             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
8950f4a2713aSLionel Sambuc  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
8951f4a2713aSLionel Sambuc             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
8952f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
8953f4a2713aSLionel Sambuc             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
8954f4a2713aSLionel Sambuc}
8955f4a2713aSLionel Sambuc
8956f4a2713aSLionel Sambucdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
8957f4a2713aSLionel Sambuc                                int_x86_avx2_maskload_d,
8958f4a2713aSLionel Sambuc                                int_x86_avx2_maskload_d_256,
8959f4a2713aSLionel Sambuc                                int_x86_avx2_maskstore_d,
8960f4a2713aSLionel Sambuc                                int_x86_avx2_maskstore_d_256>;
8961f4a2713aSLionel Sambucdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
8962f4a2713aSLionel Sambuc                                int_x86_avx2_maskload_q,
8963f4a2713aSLionel Sambuc                                int_x86_avx2_maskload_q_256,
8964f4a2713aSLionel Sambuc                                int_x86_avx2_maskstore_q,
8965f4a2713aSLionel Sambuc                                int_x86_avx2_maskstore_q_256>, VEX_W;
8966f4a2713aSLionel Sambuc
8967*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
8968*0a6a1f1dSLionel Sambuc         (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
8969*0a6a1f1dSLionel Sambuc
8970*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
8971*0a6a1f1dSLionel Sambuc         (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
8972*0a6a1f1dSLionel Sambuc
8973*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
8974*0a6a1f1dSLionel Sambuc         (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
8975*0a6a1f1dSLionel Sambuc
8976*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
8977*0a6a1f1dSLionel Sambuc         (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
8978*0a6a1f1dSLionel Sambuc
8979*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
8980*0a6a1f1dSLionel Sambuc         (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
8981*0a6a1f1dSLionel Sambuc
8982*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
8983*0a6a1f1dSLionel Sambuc                             (bc_v8f32 (v8i32 immAllZerosV)))),
8984*0a6a1f1dSLionel Sambuc         (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
8985*0a6a1f1dSLionel Sambuc
8986*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
8987*0a6a1f1dSLionel Sambuc         (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr),
8988*0a6a1f1dSLionel Sambuc                       VR256:$mask)>;
8989*0a6a1f1dSLionel Sambuc
8990*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
8991*0a6a1f1dSLionel Sambuc         (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
8992*0a6a1f1dSLionel Sambuc
8993*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))),
8994*0a6a1f1dSLionel Sambuc         (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
8995*0a6a1f1dSLionel Sambuc
8996*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))),
8997*0a6a1f1dSLionel Sambuc         (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
8998*0a6a1f1dSLionel Sambuc                       VR256:$mask)>;
8999*0a6a1f1dSLionel Sambuc
9000*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
9001*0a6a1f1dSLionel Sambuc         (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
9002*0a6a1f1dSLionel Sambuc
9003*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask),
9004*0a6a1f1dSLionel Sambuc                             (bc_v4f32 (v4i32 immAllZerosV)))),
9005*0a6a1f1dSLionel Sambuc         (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
9006*0a6a1f1dSLionel Sambuc
9007*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))),
9008*0a6a1f1dSLionel Sambuc         (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr),
9009*0a6a1f1dSLionel Sambuc                       VR128:$mask)>;
9010*0a6a1f1dSLionel Sambuc
9011*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
9012*0a6a1f1dSLionel Sambuc         (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
9013*0a6a1f1dSLionel Sambuc
9014*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))),
9015*0a6a1f1dSLionel Sambuc         (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
9016*0a6a1f1dSLionel Sambuc
9017*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))),
9018*0a6a1f1dSLionel Sambuc         (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
9019*0a6a1f1dSLionel Sambuc                       VR128:$mask)>;
9020*0a6a1f1dSLionel Sambuc
9021*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
9022*0a6a1f1dSLionel Sambuc         (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
9023*0a6a1f1dSLionel Sambuc
9024*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
9025*0a6a1f1dSLionel Sambuc         (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
9026*0a6a1f1dSLionel Sambuc
9027*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
9028*0a6a1f1dSLionel Sambuc         (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
9029*0a6a1f1dSLionel Sambuc
9030*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
9031*0a6a1f1dSLionel Sambuc                             (v4f64 immAllZerosV))),
9032*0a6a1f1dSLionel Sambuc         (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
9033*0a6a1f1dSLionel Sambuc
9034*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
9035*0a6a1f1dSLionel Sambuc         (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr),
9036*0a6a1f1dSLionel Sambuc                       VR256:$mask)>;
9037*0a6a1f1dSLionel Sambuc
9038*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
9039*0a6a1f1dSLionel Sambuc         (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
9040*0a6a1f1dSLionel Sambuc
9041*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
9042*0a6a1f1dSLionel Sambuc                             (bc_v4i64 (v8i32 immAllZerosV)))),
9043*0a6a1f1dSLionel Sambuc         (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
9044*0a6a1f1dSLionel Sambuc
9045*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))),
9046*0a6a1f1dSLionel Sambuc         (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
9047*0a6a1f1dSLionel Sambuc                       VR256:$mask)>;
9048*0a6a1f1dSLionel Sambuc
9049*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
9050*0a6a1f1dSLionel Sambuc         (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
9051*0a6a1f1dSLionel Sambuc
9052*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
9053*0a6a1f1dSLionel Sambuc         (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
9054*0a6a1f1dSLionel Sambuc
9055*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
9056*0a6a1f1dSLionel Sambuc         (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
9057*0a6a1f1dSLionel Sambuc
9058*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
9059*0a6a1f1dSLionel Sambuc                             (v2f64 immAllZerosV))),
9060*0a6a1f1dSLionel Sambuc         (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
9061*0a6a1f1dSLionel Sambuc
9062*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))),
9063*0a6a1f1dSLionel Sambuc         (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr),
9064*0a6a1f1dSLionel Sambuc                       VR128:$mask)>;
9065*0a6a1f1dSLionel Sambuc
9066*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
9067*0a6a1f1dSLionel Sambuc         (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
9068*0a6a1f1dSLionel Sambuc
9069*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
9070*0a6a1f1dSLionel Sambuc                             (bc_v2i64 (v4i32 immAllZerosV)))),
9071*0a6a1f1dSLionel Sambuc         (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
9072*0a6a1f1dSLionel Sambuc
9073*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))),
9074*0a6a1f1dSLionel Sambuc         (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr),
9075*0a6a1f1dSLionel Sambuc                       VR128:$mask)>;
9076f4a2713aSLionel Sambuc
9077f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
9078f4a2713aSLionel Sambuc// Variable Bit Shifts
9079f4a2713aSLionel Sambuc//
9080f4a2713aSLionel Sambucmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
9081f4a2713aSLionel Sambuc                          ValueType vt128, ValueType vt256> {
9082f4a2713aSLionel Sambuc  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
9083f4a2713aSLionel Sambuc             (ins VR128:$src1, VR128:$src2),
9084f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9085f4a2713aSLionel Sambuc             [(set VR128:$dst,
9086f4a2713aSLionel Sambuc               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
9087*0a6a1f1dSLionel Sambuc             VEX_4V, Sched<[WriteVarVecShift]>;
9088f4a2713aSLionel Sambuc  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
9089f4a2713aSLionel Sambuc             (ins VR128:$src1, i128mem:$src2),
9090f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9091f4a2713aSLionel Sambuc             [(set VR128:$dst,
9092f4a2713aSLionel Sambuc               (vt128 (OpNode VR128:$src1,
9093f4a2713aSLionel Sambuc                       (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
9094*0a6a1f1dSLionel Sambuc             VEX_4V, Sched<[WriteVarVecShiftLd, ReadAfterLd]>;
9095f4a2713aSLionel Sambuc  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
9096f4a2713aSLionel Sambuc             (ins VR256:$src1, VR256:$src2),
9097f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9098f4a2713aSLionel Sambuc             [(set VR256:$dst,
9099f4a2713aSLionel Sambuc               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
9100*0a6a1f1dSLionel Sambuc             VEX_4V, VEX_L, Sched<[WriteVarVecShift]>;
9101f4a2713aSLionel Sambuc  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
9102f4a2713aSLionel Sambuc             (ins VR256:$src1, i256mem:$src2),
9103f4a2713aSLionel Sambuc             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9104f4a2713aSLionel Sambuc             [(set VR256:$dst,
9105f4a2713aSLionel Sambuc               (vt256 (OpNode VR256:$src1,
9106f4a2713aSLionel Sambuc                       (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
9107*0a6a1f1dSLionel Sambuc             VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>;
9108f4a2713aSLionel Sambuc}
9109f4a2713aSLionel Sambuc
9110f4a2713aSLionel Sambucdefm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
9111f4a2713aSLionel Sambucdefm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
9112f4a2713aSLionel Sambucdefm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
9113f4a2713aSLionel Sambucdefm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
9114f4a2713aSLionel Sambucdefm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
9115f4a2713aSLionel Sambuc
9116f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===//
9117f4a2713aSLionel Sambuc// VGATHER - GATHER Operations
9118f4a2713aSLionel Sambucmulticlass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
9119f4a2713aSLionel Sambuc                       X86MemOperand memop128, X86MemOperand memop256> {
9120f4a2713aSLionel Sambuc  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb),
9121f4a2713aSLionel Sambuc            (ins VR128:$src1, memop128:$src2, VR128:$mask),
9122f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
9123f4a2713aSLionel Sambuc              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
9124f4a2713aSLionel Sambuc            []>, VEX_4VOp3;
9125f4a2713aSLionel Sambuc  def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb),
9126f4a2713aSLionel Sambuc            (ins RC256:$src1, memop256:$src2, RC256:$mask),
9127f4a2713aSLionel Sambuc            !strconcat(OpcodeStr,
9128f4a2713aSLionel Sambuc              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
9129f4a2713aSLionel Sambuc            []>, VEX_4VOp3, VEX_L;
9130f4a2713aSLionel Sambuc}
9131f4a2713aSLionel Sambuc
9132f4a2713aSLionel Sambuclet mayLoad = 1, Constraints
9133f4a2713aSLionel Sambuc  = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
9134f4a2713aSLionel Sambuc  in {
9135f4a2713aSLionel Sambuc  defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W;
9136f4a2713aSLionel Sambuc  defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W;
9137f4a2713aSLionel Sambuc  defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>;
9138f4a2713aSLionel Sambuc  defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>;
9139*0a6a1f1dSLionel Sambuc
9140*0a6a1f1dSLionel Sambuc  let ExeDomain = SSEPackedDouble in {
9141*0a6a1f1dSLionel Sambuc    defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
9142*0a6a1f1dSLionel Sambuc    defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
9143*0a6a1f1dSLionel Sambuc  }
9144*0a6a1f1dSLionel Sambuc
9145*0a6a1f1dSLionel Sambuc  let ExeDomain = SSEPackedSingle in {
9146*0a6a1f1dSLionel Sambuc    defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
9147*0a6a1f1dSLionel Sambuc    defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
9148*0a6a1f1dSLionel Sambuc  }
9149f4a2713aSLionel Sambuc}
9150