xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-sse.ll (revision acdc419c897f8a9414c7a00c8908ac32312afee2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4
5define float @test_rcp_ss_0(float %a) {
6; CHECK-LABEL: @test_rcp_ss_0(
7; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
8; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
9; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
10; CHECK-NEXT:    ret float [[TMP3]]
11;
12  %1 = insertelement <4 x float> undef, float %a, i32 0
13  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
14  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
15  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
16  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
17  %6 = extractelement <4 x float> %5, i32 0
18  ret float %6
19}
20
21define float @test_rcp_ss_1(float %a) {
22; CHECK-LABEL: @test_rcp_ss_1(
23; CHECK-NEXT:    ret float 1.000000e+00
24;
25  %1 = insertelement <4 x float> undef, float %a, i32 0
26  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
27  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
28  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
29  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
30  %6 = extractelement <4 x float> %5, i32 1
31  ret float %6
32}
33
34define float @test_sqrt_ss_0(float %a) {
35; CHECK-LABEL: @test_sqrt_ss_0(
36; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]])
37; CHECK-NEXT:    ret float [[TMP1]]
38;
39  %1 = insertelement <4 x float> undef, float %a, i32 0
40  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
41  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
42  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
43  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
44  %6 = extractelement <4 x float> %5, i32 0
45  ret float %6
46}
47
48define float @test_sqrt_ss_2(float %a) {
49; CHECK-LABEL: @test_sqrt_ss_2(
50; CHECK-NEXT:    ret float 2.000000e+00
51;
52  %1 = insertelement <4 x float> undef, float %a, i32 0
53  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
54  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
55  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
56  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
57  %6 = extractelement <4 x float> %5, i32 2
58  ret float %6
59}
60
61define float @test_rsqrt_ss_0(float %a) {
62; CHECK-LABEL: @test_rsqrt_ss_0(
63; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
64; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
65; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
66; CHECK-NEXT:    ret float [[TMP3]]
67;
68  %1 = insertelement <4 x float> undef, float %a, i32 0
69  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
70  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
71  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
72  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
73  %6 = extractelement <4 x float> %5, i32 0
74  ret float %6
75}
76
77define float @test_rsqrt_ss_3(float %a) {
78; CHECK-LABEL: @test_rsqrt_ss_3(
79; CHECK-NEXT:    ret float 3.000000e+00
80;
81  %1 = insertelement <4 x float> undef, float %a, i32 0
82  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
83  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
84  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
85  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
86  %6 = extractelement <4 x float> %5, i32 3
87  ret float %6
88}
89
90define float @test_add_ss_0(float %a, float %b) {
91; CHECK-LABEL: @test_add_ss_0(
92; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
93; CHECK-NEXT:    ret float [[TMP1]]
94;
95  %1 = insertelement <4 x float> undef, float %a, i32 0
96  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
97  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
98  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
99  %5 = insertelement <4 x float> undef, float %b, i32 0
100  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
101  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
102  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
103  %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
104  %r = extractelement <4 x float> %9, i32 0
105  ret float %r
106}
107
108define float @test_add_ss_1(float %a, float %b) {
109; CHECK-LABEL: @test_add_ss_1(
110; CHECK-NEXT:    ret float 1.000000e+00
111;
112  %1 = insertelement <4 x float> undef, float %a, i32 0
113  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
114  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
115  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
116  %5 = insertelement <4 x float> undef, float %b, i32 0
117  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
118  %7 = extractelement <4 x float> %6, i32 1
119  ret float %7
120}
121
122define float @test_add_ss_2(float %a) {
123; CHECK-LABEL: @test_add_ss_2(
124; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]]
125; CHECK-NEXT:    ret float [[TMP1]]
126;
127  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
128  %2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1)
129  %3 = extractelement <4 x float> %2, i32 0
130  ret float %3
131}
132
133define float @test_sub_ss_0(float %a, float %b) {
134; CHECK-LABEL: @test_sub_ss_0(
135; CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
136; CHECK-NEXT:    ret float [[TMP1]]
137;
138  %1 = insertelement <4 x float> undef, float %a, i32 0
139  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
140  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
141  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
142  %5 = insertelement <4 x float> undef, float %b, i32 0
143  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
144  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
145  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
146  %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
147  %r = extractelement <4 x float> %9, i32 0
148  ret float %r
149}
150
151define float @test_sub_ss_2(float %a, float %b) {
152; CHECK-LABEL: @test_sub_ss_2(
153; CHECK-NEXT:    ret float 2.000000e+00
154;
155  %1 = insertelement <4 x float> undef, float %a, i32 0
156  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
157  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
158  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
159  %5 = insertelement <4 x float> undef, float %b, i32 0
160  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
161  %7 = extractelement <4 x float> %6, i32 2
162  ret float %7
163}
164
165define float @test_sub_ss_3(float %a) {
166; CHECK-LABEL: @test_sub_ss_3(
167; CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]]
168; CHECK-NEXT:    ret float [[TMP1]]
169;
170  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
171  %2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1)
172  %3 = extractelement <4 x float> %2, i32 0
173  ret float %3
174}
175
176define float @test_mul_ss_0(float %a, float %b) {
177; CHECK-LABEL: @test_mul_ss_0(
178; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
179; CHECK-NEXT:    ret float [[TMP1]]
180;
181  %1 = insertelement <4 x float> undef, float %a, i32 0
182  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
183  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
184  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
185  %5 = insertelement <4 x float> undef, float %b, i32 0
186  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
187  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
188  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
189  %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
190  %r = extractelement <4 x float> %9, i32 0
191  ret float %r
192}
193
194define float @test_mul_ss_3(float %a, float %b) {
195; CHECK-LABEL: @test_mul_ss_3(
196; CHECK-NEXT:    ret float 3.000000e+00
197;
198  %1 = insertelement <4 x float> undef, float %a, i32 0
199  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
200  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
201  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
202  %5 = insertelement <4 x float> undef, float %b, i32 0
203  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
204  %7 = extractelement <4 x float> %6, i32 3
205  ret float %7
206}
207
208define float @test_mul_ss_4(float %a) {
209; CHECK-LABEL: @test_mul_ss_4(
210; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]]
211; CHECK-NEXT:    ret float [[TMP1]]
212;
213  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
214  %2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1)
215  %3 = extractelement <4 x float> %2, i32 0
216  ret float %3
217}
218
219define float @test_div_ss_0(float %a, float %b) {
220; CHECK-LABEL: @test_div_ss_0(
221; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
222; CHECK-NEXT:    ret float [[TMP1]]
223;
224  %1 = insertelement <4 x float> undef, float %a, i32 0
225  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
226  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
227  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
228  %5 = insertelement <4 x float> undef, float %b, i32 0
229  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
230  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
231  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
232  %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
233  %r = extractelement <4 x float> %9, i32 0
234  ret float %r
235}
236
237define float @test_div_ss_1(float %a, float %b) {
238; CHECK-LABEL: @test_div_ss_1(
239; CHECK-NEXT:    ret float 1.000000e+00
240;
241  %1 = insertelement <4 x float> undef, float %a, i32 0
242  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
243  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
244  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
245  %5 = insertelement <4 x float> undef, float %b, i32 0
246  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
247  %7 = extractelement <4 x float> %6, i32 1
248  ret float %7
249}
250
251define float @test_div_ss_2(float %a) {
252; CHECK-LABEL: @test_div_ss_2(
253; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]]
254; CHECK-NEXT:    ret float [[TMP1]]
255;
256  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
257  %2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1)
258  %3 = extractelement <4 x float> %2, i32 0
259  ret float %3
260}
261
262define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
263; CHECK-LABEL: @test_min_ss(
264; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
265; CHECK-NEXT:    ret <4 x float> [[TMP1]]
266;
267  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
268  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
269  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
270  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
271  ret <4 x float> %4
272}
273
274define float @test_min_ss_0(float %a, float %b) {
275; CHECK-LABEL: @test_min_ss_0(
276; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
277; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
278; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
279; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i64 0
280; CHECK-NEXT:    ret float [[TMP4]]
281;
282  %1 = insertelement <4 x float> undef, float %a, i32 0
283  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
284  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
285  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
286  %5 = insertelement <4 x float> undef, float %b, i32 0
287  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
288  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
289  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
290  %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
291  %10 = extractelement <4 x float> %9, i32 0
292  ret float %10
293}
294
295define float @test_min_ss_2(float %a, float %b) {
296; CHECK-LABEL: @test_min_ss_2(
297; CHECK-NEXT:    ret float 2.000000e+00
298;
299  %1 = insertelement <4 x float> undef, float %a, i32 0
300  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
301  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
302  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
303  %5 = insertelement <4 x float> undef, float %b, i32 0
304  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
305  %7 = extractelement <4 x float> %6, i32 2
306  ret float %7
307}
308
309define float @test_min_ss_3(float %a) {
310; CHECK-LABEL: @test_min_ss_3(
311; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0
312; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
313; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
314; CHECK-NEXT:    ret float [[TMP3]]
315;
316  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
317  %2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1)
318  %3 = extractelement <4 x float> %2, i32 0
319  ret float %3
320}
321
322define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
323; CHECK-LABEL: @test_max_ss(
324; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
325; CHECK-NEXT:    ret <4 x float> [[TMP1]]
326;
327  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
328  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
329  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
330  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
331  ret <4 x float> %4
332}
333
334define float @test_max_ss_0(float %a, float %b) {
335; CHECK-LABEL: @test_max_ss_0(
336; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
337; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
338; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
339; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i64 0
340; CHECK-NEXT:    ret float [[TMP4]]
341;
342  %1 = insertelement <4 x float> undef, float %a, i32 0
343  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
344  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
345  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
346  %5 = insertelement <4 x float> undef, float %b, i32 0
347  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
348  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
349  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
350  %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
351  %10 = extractelement <4 x float> %9, i32 0
352  ret float %10
353}
354
355define float @test_max_ss_3(float %a, float %b) {
356; CHECK-LABEL: @test_max_ss_3(
357; CHECK-NEXT:    ret float 3.000000e+00
358;
359  %1 = insertelement <4 x float> undef, float %a, i32 0
360  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
361  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
362  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
363  %5 = insertelement <4 x float> undef, float %b, i32 0
364  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
365  %7 = extractelement <4 x float> %6, i32 3
366  ret float %7
367}
368
369define float @test_max_ss_4(float %a) {
370; CHECK-LABEL: @test_max_ss_4(
371; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0
372; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
373; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
374; CHECK-NEXT:    ret float [[TMP3]]
375;
376  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
377  %2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1)
378  %3 = extractelement <4 x float> %2, i32 0
379  ret float %3
380}
381
382define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
383; CHECK-LABEL: @test_cmp_ss(
384; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0)
385; CHECK-NEXT:    ret <4 x float> [[TMP1]]
386;
387  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
388  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
389  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
390  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
391  ret <4 x float> %4
392}
393
394define float @test_cmp_ss_0(float %a, float %b) {
395; CHECK-LABEL: @test_cmp_ss_0(
396; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
397; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
398; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
399; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i64 0
400; CHECK-NEXT:    ret float [[R]]
401;
402  %1 = insertelement <4 x float> undef, float %a, i32 0
403  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
404  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
405  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
406  %5 = insertelement <4 x float> undef, float %b, i32 0
407  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
408  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
409  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
410  %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
411  %r = extractelement <4 x float> %9, i32 0
412  ret float %r
413}
414
415define float @test_cmp_ss_1(float %a, float %b) {
416; CHECK-LABEL: @test_cmp_ss_1(
417; CHECK-NEXT:    ret float 1.000000e+00
418;
419  %1 = insertelement <4 x float> undef, float %a, i32 0
420  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
421  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
422  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
423  %5 = insertelement <4 x float> undef, float %b, i32 0
424  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
425  %7 = extractelement <4 x float> %6, i32 1
426  ret float %7
427}
428
429define float @test_cmp_ss_2(float %a) {
430; CHECK-LABEL: @test_cmp_ss_2(
431; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0
432; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3)
433; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
434; CHECK-NEXT:    ret float [[TMP3]]
435;
436  %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
437  %2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3)
438  %3 = extractelement <4 x float> %2, i32 0
439  ret float %3
440}
441
442define i32 @test_comieq_ss_0(float %a, float %b) {
443; CHECK-LABEL: @test_comieq_ss_0(
444; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
445; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
446; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
447; CHECK-NEXT:    ret i32 [[TMP3]]
448;
449  %1 = insertelement <4 x float> undef, float %a, i32 0
450  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
451  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
452  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
453  %5 = insertelement <4 x float> undef, float %b, i32 0
454  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
455  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
456  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
457  %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
458  ret i32 %9
459}
460
461define i32 @test_comige_ss_0(float %a, float %b) {
462; CHECK-LABEL: @test_comige_ss_0(
463; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
464; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
465; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
466; CHECK-NEXT:    ret i32 [[TMP3]]
467;
468  %1 = insertelement <4 x float> undef, float %a, i32 0
469  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
470  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
471  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
472  %5 = insertelement <4 x float> undef, float %b, i32 0
473  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
474  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
475  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
476  %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
477  ret i32 %9
478}
479
480define i32 @test_comigt_ss_0(float %a, float %b) {
481; CHECK-LABEL: @test_comigt_ss_0(
482; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
483; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
484; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
485; CHECK-NEXT:    ret i32 [[TMP3]]
486;
487  %1 = insertelement <4 x float> undef, float %a, i32 0
488  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
489  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
490  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
491  %5 = insertelement <4 x float> undef, float %b, i32 0
492  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
493  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
494  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
495  %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
496  ret i32 %9
497}
498
499define i32 @test_comile_ss_0(float %a, float %b) {
500; CHECK-LABEL: @test_comile_ss_0(
501; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
502; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
503; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
504; CHECK-NEXT:    ret i32 [[TMP3]]
505;
506  %1 = insertelement <4 x float> undef, float %a, i32 0
507  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
508  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
509  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
510  %5 = insertelement <4 x float> undef, float %b, i32 0
511  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
512  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
513  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
514  %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
515  ret i32 %9
516}
517
518define i32 @test_comilt_ss_0(float %a, float %b) {
519; CHECK-LABEL: @test_comilt_ss_0(
520; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
521; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
522; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
523; CHECK-NEXT:    ret i32 [[TMP3]]
524;
525  %1 = insertelement <4 x float> undef, float %a, i32 0
526  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
527  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
528  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
529  %5 = insertelement <4 x float> undef, float %b, i32 0
530  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
531  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
532  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
533  %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
534  ret i32 %9
535}
536
537define i32 @test_comineq_ss_0(float %a, float %b) {
538; CHECK-LABEL: @test_comineq_ss_0(
539; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
540; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
541; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
542; CHECK-NEXT:    ret i32 [[TMP3]]
543;
544  %1 = insertelement <4 x float> undef, float %a, i32 0
545  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
546  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
547  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
548  %5 = insertelement <4 x float> undef, float %b, i32 0
549  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
550  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
551  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
552  %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
553  ret i32 %9
554}
555
556define i32 @test_ucomieq_ss_0(float %a, float %b) {
557; CHECK-LABEL: @test_ucomieq_ss_0(
558; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
559; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
560; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
561; CHECK-NEXT:    ret i32 [[TMP3]]
562;
563  %1 = insertelement <4 x float> undef, float %a, i32 0
564  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
565  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
566  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
567  %5 = insertelement <4 x float> undef, float %b, i32 0
568  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
569  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
570  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
571  %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
572  ret i32 %9
573}
574
575define i32 @test_ucomige_ss_0(float %a, float %b) {
576; CHECK-LABEL: @test_ucomige_ss_0(
577; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
578; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
579; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
580; CHECK-NEXT:    ret i32 [[TMP3]]
581;
582  %1 = insertelement <4 x float> undef, float %a, i32 0
583  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
584  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
585  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
586  %5 = insertelement <4 x float> undef, float %b, i32 0
587  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
588  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
589  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
590  %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
591  ret i32 %9
592}
593
594define i32 @test_ucomigt_ss_0(float %a, float %b) {
595; CHECK-LABEL: @test_ucomigt_ss_0(
596; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
597; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
598; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
599; CHECK-NEXT:    ret i32 [[TMP3]]
600;
601  %1 = insertelement <4 x float> undef, float %a, i32 0
602  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
603  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
604  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
605  %5 = insertelement <4 x float> undef, float %b, i32 0
606  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
607  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
608  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
609  %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
610  ret i32 %9
611}
612
613define i32 @test_ucomile_ss_0(float %a, float %b) {
614; CHECK-LABEL: @test_ucomile_ss_0(
615; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
616; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
617; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
618; CHECK-NEXT:    ret i32 [[TMP3]]
619;
620  %1 = insertelement <4 x float> undef, float %a, i32 0
621  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
622  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
623  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
624  %5 = insertelement <4 x float> undef, float %b, i32 0
625  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
626  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
627  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
628  %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
629  ret i32 %9
630}
631
632define i32 @test_ucomilt_ss_0(float %a, float %b) {
633; CHECK-LABEL: @test_ucomilt_ss_0(
634; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
635; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
636; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
637; CHECK-NEXT:    ret i32 [[TMP3]]
638;
639  %1 = insertelement <4 x float> undef, float %a, i32 0
640  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
641  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
642  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
643  %5 = insertelement <4 x float> undef, float %b, i32 0
644  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
645  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
646  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
647  %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
648  ret i32 %9
649}
650
651define i32 @test_ucomineq_ss_0(float %a, float %b) {
652; CHECK-LABEL: @test_ucomineq_ss_0(
653; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
654; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
655; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
656; CHECK-NEXT:    ret i32 [[TMP3]]
657;
658  %1 = insertelement <4 x float> undef, float %a, i32 0
659  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
660  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
661  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
662  %5 = insertelement <4 x float> undef, float %b, i32 0
663  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
664  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
665  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
666  %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
667  ret i32 %9
668}
669
670declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
671declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
672declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
673
674declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
675declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
676declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
677declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
678declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
679declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
680declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
681
682declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
683declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
684declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
685declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
686declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
687declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
688
689declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
690declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
691declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
692declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
693declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
694declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)
695