xref: /llvm-project/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,SSE,X86-SSE
3; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X86-AVX,AVX1
4; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X86-AVX,AVX512
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,SSE,X64-SSE
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X64-AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X64-AVX,AVX512
8
9define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
10; SSE-LABEL: test_x86_sse_cmp_ps:
11; SSE:       ## %bb.0:
12; SSE-NEXT:    cmpordps %xmm1, %xmm0 ## encoding: [0x0f,0xc2,0xc1,0x07]
13; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
14;
15; AVX-LABEL: test_x86_sse_cmp_ps:
16; AVX:       ## %bb.0:
17; AVX-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x07]
18; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
19  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
20  ret <4 x float> %res
21}
22declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
23
24
25define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
26; SSE-LABEL: test_x86_sse_cmp_ss:
27; SSE:       ## %bb.0:
28; SSE-NEXT:    cmpordss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0xc2,0xc1,0x07]
29; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
30;
31; AVX-LABEL: test_x86_sse_cmp_ss:
32; AVX:       ## %bb.0:
33; AVX-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xc2,0xc1,0x07]
34; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
35  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
36  ret <4 x float> %res
37}
38declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
39
40
41define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
42; SSE-LABEL: test_x86_sse_comieq_ss:
43; SSE:       ## %bb.0:
44; SSE-NEXT:    comiss %xmm1, %xmm0 ## encoding: [0x0f,0x2f,0xc1]
45; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
46; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
47; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
48; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
49; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
50;
51; AVX1-LABEL: test_x86_sse_comieq_ss:
52; AVX1:       ## %bb.0:
53; AVX1-NEXT:    vcomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2f,0xc1]
54; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
55; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
56; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
57; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
58; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
59;
60; AVX512-LABEL: test_x86_sse_comieq_ss:
61; AVX512:       ## %bb.0:
62; AVX512-NEXT:    vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
63; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
64; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
65; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
66; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
67; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
68  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
69  ret i32 %res
70}
71declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
72
73
74define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
75; SSE-LABEL: test_x86_sse_comige_ss:
76; SSE:       ## %bb.0:
77; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
78; SSE-NEXT:    comiss %xmm1, %xmm0 ## encoding: [0x0f,0x2f,0xc1]
79; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
80; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
81;
82; AVX1-LABEL: test_x86_sse_comige_ss:
83; AVX1:       ## %bb.0:
84; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
85; AVX1-NEXT:    vcomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2f,0xc1]
86; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
87; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
88;
89; AVX512-LABEL: test_x86_sse_comige_ss:
90; AVX512:       ## %bb.0:
91; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
92; AVX512-NEXT:    vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
93; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
94; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
95  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
96  ret i32 %res
97}
98declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
99
100
101define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
102; SSE-LABEL: test_x86_sse_comigt_ss:
103; SSE:       ## %bb.0:
104; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
105; SSE-NEXT:    comiss %xmm1, %xmm0 ## encoding: [0x0f,0x2f,0xc1]
106; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
107; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
108;
109; AVX1-LABEL: test_x86_sse_comigt_ss:
110; AVX1:       ## %bb.0:
111; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
112; AVX1-NEXT:    vcomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2f,0xc1]
113; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
114; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115;
116; AVX512-LABEL: test_x86_sse_comigt_ss:
117; AVX512:       ## %bb.0:
118; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
119; AVX512-NEXT:    vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
120; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
121; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
122  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
123  ret i32 %res
124}
125declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
126
127
128define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
129; SSE-LABEL: test_x86_sse_comile_ss:
130; SSE:       ## %bb.0:
131; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
132; SSE-NEXT:    comiss %xmm0, %xmm1 ## encoding: [0x0f,0x2f,0xc8]
133; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
134; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
135;
136; AVX1-LABEL: test_x86_sse_comile_ss:
137; AVX1:       ## %bb.0:
138; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
139; AVX1-NEXT:    vcomiss %xmm0, %xmm1 ## encoding: [0xc5,0xf8,0x2f,0xc8]
140; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
141; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
142;
143; AVX512-LABEL: test_x86_sse_comile_ss:
144; AVX512:       ## %bb.0:
145; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
146; AVX512-NEXT:    vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
147; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
148; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
149  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
150  ret i32 %res
151}
152declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
153
154
155define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
156; SSE-LABEL: test_x86_sse_comilt_ss:
157; SSE:       ## %bb.0:
158; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
159; SSE-NEXT:    comiss %xmm0, %xmm1 ## encoding: [0x0f,0x2f,0xc8]
160; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
161; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
162;
163; AVX1-LABEL: test_x86_sse_comilt_ss:
164; AVX1:       ## %bb.0:
165; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
166; AVX1-NEXT:    vcomiss %xmm0, %xmm1 ## encoding: [0xc5,0xf8,0x2f,0xc8]
167; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
168; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
169;
170; AVX512-LABEL: test_x86_sse_comilt_ss:
171; AVX512:       ## %bb.0:
172; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
173; AVX512-NEXT:    vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
174; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
175; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
176  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
177  ret i32 %res
178}
179declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
180
181
182define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
183; SSE-LABEL: test_x86_sse_comineq_ss:
184; SSE:       ## %bb.0:
185; SSE-NEXT:    comiss %xmm1, %xmm0 ## encoding: [0x0f,0x2f,0xc1]
186; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
187; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
188; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
189; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
190; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
191;
192; AVX1-LABEL: test_x86_sse_comineq_ss:
193; AVX1:       ## %bb.0:
194; AVX1-NEXT:    vcomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2f,0xc1]
195; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
196; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
197; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
198; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
199; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
200;
201; AVX512-LABEL: test_x86_sse_comineq_ss:
202; AVX512:       ## %bb.0:
203; AVX512-NEXT:    vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
204; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
205; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
206; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
207; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
208; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
209  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
210  ret i32 %res
211}
212declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
213
214
215define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
216; SSE-LABEL: test_x86_sse_cvtss2si:
217; SSE:       ## %bb.0:
218; SSE-NEXT:    cvtss2si %xmm0, %eax ## encoding: [0xf3,0x0f,0x2d,0xc0]
219; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
220;
221; AVX1-LABEL: test_x86_sse_cvtss2si:
222; AVX1:       ## %bb.0:
223; AVX1-NEXT:    vcvtss2si %xmm0, %eax ## encoding: [0xc5,0xfa,0x2d,0xc0]
224; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
225;
226; AVX512-LABEL: test_x86_sse_cvtss2si:
227; AVX512:       ## %bb.0:
228; AVX512-NEXT:    vcvtss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
229; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
230  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
231  ret i32 %res
232}
233declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
234
235
236define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
237; SSE-LABEL: test_x86_sse_cvttss2si:
238; SSE:       ## %bb.0:
239; SSE-NEXT:    cvttss2si %xmm0, %eax ## encoding: [0xf3,0x0f,0x2c,0xc0]
240; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
241;
242; AVX1-LABEL: test_x86_sse_cvttss2si:
243; AVX1:       ## %bb.0:
244; AVX1-NEXT:    vcvttss2si %xmm0, %eax ## encoding: [0xc5,0xfa,0x2c,0xc0]
245; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
246;
247; AVX512-LABEL: test_x86_sse_cvttss2si:
248; AVX512:       ## %bb.0:
249; AVX512-NEXT:    vcvttss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
250; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
251  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
252  ret i32 %res
253}
254declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
255
256
257define void @test_x86_sse_ldmxcsr(ptr %a0) {
258; X86-SSE-LABEL: test_x86_sse_ldmxcsr:
259; X86-SSE:       ## %bb.0:
260; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
261; X86-SSE-NEXT:    ldmxcsr (%eax) ## encoding: [0x0f,0xae,0x10]
262; X86-SSE-NEXT:    retl ## encoding: [0xc3]
263;
264; X86-AVX-LABEL: test_x86_sse_ldmxcsr:
265; X86-AVX:       ## %bb.0:
266; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
267; X86-AVX-NEXT:    vldmxcsr (%eax) ## encoding: [0xc5,0xf8,0xae,0x10]
268; X86-AVX-NEXT:    retl ## encoding: [0xc3]
269;
270; X64-SSE-LABEL: test_x86_sse_ldmxcsr:
271; X64-SSE:       ## %bb.0:
272; X64-SSE-NEXT:    ldmxcsr (%rdi) ## encoding: [0x0f,0xae,0x17]
273; X64-SSE-NEXT:    retq ## encoding: [0xc3]
274;
275; X64-AVX-LABEL: test_x86_sse_ldmxcsr:
276; X64-AVX:       ## %bb.0:
277; X64-AVX-NEXT:    vldmxcsr (%rdi) ## encoding: [0xc5,0xf8,0xae,0x17]
278; X64-AVX-NEXT:    retq ## encoding: [0xc3]
279  call void @llvm.x86.sse.ldmxcsr(ptr %a0)
280  ret void
281}
282declare void @llvm.x86.sse.ldmxcsr(ptr) nounwind
283
284
285
286define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
287; SSE-LABEL: test_x86_sse_max_ps:
288; SSE:       ## %bb.0:
289; SSE-NEXT:    maxps %xmm1, %xmm0 ## encoding: [0x0f,0x5f,0xc1]
290; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
291;
292; AVX1-LABEL: test_x86_sse_max_ps:
293; AVX1:       ## %bb.0:
294; AVX1-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5f,0xc1]
295; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
296;
297; AVX512-LABEL: test_x86_sse_max_ps:
298; AVX512:       ## %bb.0:
299; AVX512-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
300; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
301  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
302  ret <4 x float> %res
303}
304declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
305
306
307define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
308; SSE-LABEL: test_x86_sse_max_ss:
309; SSE:       ## %bb.0:
310; SSE-NEXT:    maxss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5f,0xc1]
311; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
312;
313; AVX1-LABEL: test_x86_sse_max_ss:
314; AVX1:       ## %bb.0:
315; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5f,0xc1]
316; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
317;
318; AVX512-LABEL: test_x86_sse_max_ss:
319; AVX512:       ## %bb.0:
320; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1]
321; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
322  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
323  ret <4 x float> %res
324}
325declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
326
327
328define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
329; SSE-LABEL: test_x86_sse_min_ps:
330; SSE:       ## %bb.0:
331; SSE-NEXT:    minps %xmm1, %xmm0 ## encoding: [0x0f,0x5d,0xc1]
332; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
333;
334; AVX1-LABEL: test_x86_sse_min_ps:
335; AVX1:       ## %bb.0:
336; AVX1-NEXT:    vminps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5d,0xc1]
337; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
338;
339; AVX512-LABEL: test_x86_sse_min_ps:
340; AVX512:       ## %bb.0:
341; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
342; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
343  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
344  ret <4 x float> %res
345}
346declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
347
348
349define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
350; SSE-LABEL: test_x86_sse_min_ss:
351; SSE:       ## %bb.0:
352; SSE-NEXT:    minss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5d,0xc1]
353; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
354;
355; AVX1-LABEL: test_x86_sse_min_ss:
356; AVX1:       ## %bb.0:
357; AVX1-NEXT:    vminss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5d,0xc1]
358; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
359;
360; AVX512-LABEL: test_x86_sse_min_ss:
361; AVX512:       ## %bb.0:
362; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1]
363; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
364  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
365  ret <4 x float> %res
366}
367declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
368
369
370define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
371; SSE-LABEL: test_x86_sse_movmsk_ps:
372; SSE:       ## %bb.0:
373; SSE-NEXT:    movmskps %xmm0, %eax ## encoding: [0x0f,0x50,0xc0]
374; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
375;
376; AVX-LABEL: test_x86_sse_movmsk_ps:
377; AVX:       ## %bb.0:
378; AVX-NEXT:    vmovmskps %xmm0, %eax ## encoding: [0xc5,0xf8,0x50,0xc0]
379; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
380  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
381  ret i32 %res
382}
383declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
384
385
386
387define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
388; SSE-LABEL: test_x86_sse_rcp_ps:
389; SSE:       ## %bb.0:
390; SSE-NEXT:    rcpps %xmm0, %xmm0 ## encoding: [0x0f,0x53,0xc0]
391; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
392;
393; AVX-LABEL: test_x86_sse_rcp_ps:
394; AVX:       ## %bb.0:
395; AVX-NEXT:    vrcpps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x53,0xc0]
396; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
397  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
398  ret <4 x float> %res
399}
400declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
401
402
403define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
404; SSE-LABEL: test_x86_sse_rcp_ss:
405; SSE:       ## %bb.0:
406; SSE-NEXT:    rcpss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x53,0xc0]
407; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
408;
409; AVX-LABEL: test_x86_sse_rcp_ss:
410; AVX:       ## %bb.0:
411; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x53,0xc0]
412; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
413  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
414  ret <4 x float> %res
415}
416declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
417
418
419define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
420; SSE-LABEL: test_x86_sse_rsqrt_ps:
421; SSE:       ## %bb.0:
422; SSE-NEXT:    rsqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x52,0xc0]
423; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
424;
425; AVX-LABEL: test_x86_sse_rsqrt_ps:
426; AVX:       ## %bb.0:
427; AVX-NEXT:    vrsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x52,0xc0]
428; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
429  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
430  ret <4 x float> %res
431}
432declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
433
434
435define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
436; SSE-LABEL: test_x86_sse_rsqrt_ss:
437; SSE:       ## %bb.0:
438; SSE-NEXT:    rsqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x52,0xc0]
439; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
440;
441; AVX-LABEL: test_x86_sse_rsqrt_ss:
442; AVX:       ## %bb.0:
443; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x52,0xc0]
444; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
445  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
446  ret <4 x float> %res
447}
448declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
449
450
451define void @test_x86_sse_stmxcsr(ptr %a0) {
452; X86-SSE-LABEL: test_x86_sse_stmxcsr:
453; X86-SSE:       ## %bb.0:
454; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
455; X86-SSE-NEXT:    stmxcsr (%eax) ## encoding: [0x0f,0xae,0x18]
456; X86-SSE-NEXT:    retl ## encoding: [0xc3]
457;
458; X86-AVX-LABEL: test_x86_sse_stmxcsr:
459; X86-AVX:       ## %bb.0:
460; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
461; X86-AVX-NEXT:    vstmxcsr (%eax) ## encoding: [0xc5,0xf8,0xae,0x18]
462; X86-AVX-NEXT:    retl ## encoding: [0xc3]
463;
464; X64-SSE-LABEL: test_x86_sse_stmxcsr:
465; X64-SSE:       ## %bb.0:
466; X64-SSE-NEXT:    stmxcsr (%rdi) ## encoding: [0x0f,0xae,0x1f]
467; X64-SSE-NEXT:    retq ## encoding: [0xc3]
468;
469; X64-AVX-LABEL: test_x86_sse_stmxcsr:
470; X64-AVX:       ## %bb.0:
471; X64-AVX-NEXT:    vstmxcsr (%rdi) ## encoding: [0xc5,0xf8,0xae,0x1f]
472; X64-AVX-NEXT:    retq ## encoding: [0xc3]
473  call void @llvm.x86.sse.stmxcsr(ptr %a0)
474  ret void
475}
476declare void @llvm.x86.sse.stmxcsr(ptr) nounwind
477
478
479define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
480; SSE-LABEL: test_x86_sse_ucomieq_ss:
481; SSE:       ## %bb.0:
482; SSE-NEXT:    ucomiss %xmm1, %xmm0 ## encoding: [0x0f,0x2e,0xc1]
483; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
484; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
485; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
486; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
487; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
488;
489; AVX1-LABEL: test_x86_sse_ucomieq_ss:
490; AVX1:       ## %bb.0:
491; AVX1-NEXT:    vucomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2e,0xc1]
492; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
493; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
494; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
495; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
496; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
497;
498; AVX512-LABEL: test_x86_sse_ucomieq_ss:
499; AVX512:       ## %bb.0:
500; AVX512-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
501; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
502; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
503; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
504; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
505; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
506  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
507  ret i32 %res
508}
509declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
510
511
512define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
513; SSE-LABEL: test_x86_sse_ucomige_ss:
514; SSE:       ## %bb.0:
515; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
516; SSE-NEXT:    ucomiss %xmm1, %xmm0 ## encoding: [0x0f,0x2e,0xc1]
517; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
518; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
519;
520; AVX1-LABEL: test_x86_sse_ucomige_ss:
521; AVX1:       ## %bb.0:
522; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
523; AVX1-NEXT:    vucomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2e,0xc1]
524; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
525; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
526;
527; AVX512-LABEL: test_x86_sse_ucomige_ss:
528; AVX512:       ## %bb.0:
529; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
530; AVX512-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
531; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
532; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
533  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
534  ret i32 %res
535}
536declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
537
538
539define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
540; SSE-LABEL: test_x86_sse_ucomigt_ss:
541; SSE:       ## %bb.0:
542; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
543; SSE-NEXT:    ucomiss %xmm1, %xmm0 ## encoding: [0x0f,0x2e,0xc1]
544; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
545; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
546;
547; AVX1-LABEL: test_x86_sse_ucomigt_ss:
548; AVX1:       ## %bb.0:
549; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
550; AVX1-NEXT:    vucomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2e,0xc1]
551; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
552; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
553;
554; AVX512-LABEL: test_x86_sse_ucomigt_ss:
555; AVX512:       ## %bb.0:
556; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
557; AVX512-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
558; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
559; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
560  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
561  ret i32 %res
562}
563declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
564
565
566define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
567; SSE-LABEL: test_x86_sse_ucomile_ss:
568; SSE:       ## %bb.0:
569; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
570; SSE-NEXT:    ucomiss %xmm0, %xmm1 ## encoding: [0x0f,0x2e,0xc8]
571; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
572; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
573;
574; AVX1-LABEL: test_x86_sse_ucomile_ss:
575; AVX1:       ## %bb.0:
576; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
577; AVX1-NEXT:    vucomiss %xmm0, %xmm1 ## encoding: [0xc5,0xf8,0x2e,0xc8]
578; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
579; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
580;
581; AVX512-LABEL: test_x86_sse_ucomile_ss:
582; AVX512:       ## %bb.0:
583; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
584; AVX512-NEXT:    vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
585; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
586; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
587  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
588  ret i32 %res
589}
590declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
591
592
593define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
594; SSE-LABEL: test_x86_sse_ucomilt_ss:
595; SSE:       ## %bb.0:
596; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
597; SSE-NEXT:    ucomiss %xmm0, %xmm1 ## encoding: [0x0f,0x2e,0xc8]
598; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
599; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
600;
601; AVX1-LABEL: test_x86_sse_ucomilt_ss:
602; AVX1:       ## %bb.0:
603; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
604; AVX1-NEXT:    vucomiss %xmm0, %xmm1 ## encoding: [0xc5,0xf8,0x2e,0xc8]
605; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
606; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
607;
608; AVX512-LABEL: test_x86_sse_ucomilt_ss:
609; AVX512:       ## %bb.0:
610; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
611; AVX512-NEXT:    vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
612; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
613; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
614  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
615  ret i32 %res
616}
617declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
618
619
620define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
621; SSE-LABEL: test_x86_sse_ucomineq_ss:
622; SSE:       ## %bb.0:
623; SSE-NEXT:    ucomiss %xmm1, %xmm0 ## encoding: [0x0f,0x2e,0xc1]
624; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
625; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
626; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
627; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
628; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
629;
630; AVX1-LABEL: test_x86_sse_ucomineq_ss:
631; AVX1:       ## %bb.0:
632; AVX1-NEXT:    vucomiss %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x2e,0xc1]
633; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
634; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
635; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
636; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
637; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
638;
639; AVX512-LABEL: test_x86_sse_ucomineq_ss:
640; AVX512:       ## %bb.0:
641; AVX512-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
642; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
643; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
644; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
645; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
646; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
647  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
648  ret i32 %res
649}
650declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
651
652
653define void @sfence() nounwind {
654; CHECK-LABEL: sfence:
655; CHECK:       ## %bb.0:
656; CHECK-NEXT:    sfence ## encoding: [0x0f,0xae,0xf8]
657; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
658  tail call void @llvm.x86.sse.sfence()
659  ret void
660}
661declare void @llvm.x86.sse.sfence() nounwind
662