xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
3
4; Check propagation of optional IR flags (PR20802). For a flag to
5; propagate from scalar instructions to their vector replacement,
6; *all* scalar instructions must have the flag.
7
8target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-unknown"
10
11define void @exact(ptr %x) {
12; CHECK-LABEL: @exact(
13; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
14; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], splat (i32 1)
15; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
16; CHECK-NEXT:    ret void
17;
18  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
19  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
20  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
21
22  %load1 = load i32, ptr %x, align 4
23  %load2 = load i32, ptr %idx2, align 4
24  %load3 = load i32, ptr %idx3, align 4
25  %load4 = load i32, ptr %idx4, align 4
26
27  %op1 = lshr exact i32 %load1, 1
28  %op2 = lshr exact i32 %load2, 1
29  %op3 = lshr exact i32 %load3, 1
30  %op4 = lshr exact i32 %load4, 1
31
32  store i32 %op1, ptr %x, align 4
33  store i32 %op2, ptr %idx2, align 4
34  store i32 %op3, ptr %idx3, align 4
35  store i32 %op4, ptr %idx4, align 4
36
37  ret void
38}
39
40define void @not_exact(ptr %x) {
41; CHECK-LABEL: @not_exact(
42; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
43; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], splat (i32 1)
44; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
45; CHECK-NEXT:    ret void
46;
47  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
48  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
49  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
50
51  %load1 = load i32, ptr %x, align 4
52  %load2 = load i32, ptr %idx2, align 4
53  %load3 = load i32, ptr %idx3, align 4
54  %load4 = load i32, ptr %idx4, align 4
55
56  %op1 = lshr exact i32 %load1, 1
57  %op2 = lshr i32 %load2, 1
58  %op3 = lshr exact i32 %load3, 1
59  %op4 = lshr exact i32 %load4, 1
60
61  store i32 %op1, ptr %x, align 4
62  store i32 %op2, ptr %idx2, align 4
63  store i32 %op3, ptr %idx3, align 4
64  store i32 %op4, ptr %idx4, align 4
65
66  ret void
67}
68
69define void @nsw(ptr %x) {
70; CHECK-LABEL: @nsw(
71; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
72; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
73; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
74; CHECK-NEXT:    ret void
75;
76  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
77  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
78  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
79
80  %load1 = load i32, ptr %x, align 4
81  %load2 = load i32, ptr %idx2, align 4
82  %load3 = load i32, ptr %idx3, align 4
83  %load4 = load i32, ptr %idx4, align 4
84
85  %op1 = add nsw i32 %load1, 1
86  %op2 = add nsw i32 %load2, 1
87  %op3 = add nsw i32 %load3, 1
88  %op4 = add nsw i32 %load4, 1
89
90  store i32 %op1, ptr %x, align 4
91  store i32 %op2, ptr %idx2, align 4
92  store i32 %op3, ptr %idx3, align 4
93  store i32 %op4, ptr %idx4, align 4
94
95  ret void
96}
97
98define void @not_nsw(ptr %x) {
99; CHECK-LABEL: @not_nsw(
100; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
101; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
102; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
103; CHECK-NEXT:    ret void
104;
105  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
106  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
107  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
108
109  %load1 = load i32, ptr %x, align 4
110  %load2 = load i32, ptr %idx2, align 4
111  %load3 = load i32, ptr %idx3, align 4
112  %load4 = load i32, ptr %idx4, align 4
113
114  %op1 = add nsw i32 %load1, 1
115  %op2 = add nsw i32 %load2, 1
116  %op3 = add nsw i32 %load3, 1
117  %op4 = add i32 %load4, 1
118
119  store i32 %op1, ptr %x, align 4
120  store i32 %op2, ptr %idx2, align 4
121  store i32 %op3, ptr %idx3, align 4
122  store i32 %op4, ptr %idx4, align 4
123
124  ret void
125}
126
127define void @nuw(ptr %x) {
128; CHECK-LABEL: @nuw(
129; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
130; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1)
131; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
132; CHECK-NEXT:    ret void
133;
134  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
135  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
136  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
137
138  %load1 = load i32, ptr %x, align 4
139  %load2 = load i32, ptr %idx2, align 4
140  %load3 = load i32, ptr %idx3, align 4
141  %load4 = load i32, ptr %idx4, align 4
142
143  %op1 = add nuw i32 %load1, 1
144  %op2 = add nuw i32 %load2, 1
145  %op3 = add nuw i32 %load3, 1
146  %op4 = add nuw i32 %load4, 1
147
148  store i32 %op1, ptr %x, align 4
149  store i32 %op2, ptr %idx2, align 4
150  store i32 %op3, ptr %idx3, align 4
151  store i32 %op4, ptr %idx4, align 4
152
153  ret void
154}
155
156define void @not_nuw(ptr %x) {
157; CHECK-LABEL: @not_nuw(
158; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
159; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
160; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
161; CHECK-NEXT:    ret void
162;
163  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
164  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
165  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
166
167  %load1 = load i32, ptr %x, align 4
168  %load2 = load i32, ptr %idx2, align 4
169  %load3 = load i32, ptr %idx3, align 4
170  %load4 = load i32, ptr %idx4, align 4
171
172  %op1 = add nuw i32 %load1, 1
173  %op2 = add i32 %load2, 1
174  %op3 = add i32 %load3, 1
175  %op4 = add nuw i32 %load4, 1
176
177  store i32 %op1, ptr %x, align 4
178  store i32 %op2, ptr %idx2, align 4
179  store i32 %op3, ptr %idx3, align 4
180  store i32 %op4, ptr %idx4, align 4
181
182  ret void
183}
184
185define void @not_nsw_but_nuw(ptr %x) {
186; CHECK-LABEL: @not_nsw_but_nuw(
187; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
188; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1)
189; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
190; CHECK-NEXT:    ret void
191;
192  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
193  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
194  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
195
196  %load1 = load i32, ptr %x, align 4
197  %load2 = load i32, ptr %idx2, align 4
198  %load3 = load i32, ptr %idx3, align 4
199  %load4 = load i32, ptr %idx4, align 4
200
201  %op1 = add nuw i32 %load1, 1
202  %op2 = add nuw nsw i32 %load2, 1
203  %op3 = add nuw nsw i32 %load3, 1
204  %op4 = add nuw i32 %load4, 1
205
206  store i32 %op1, ptr %x, align 4
207  store i32 %op2, ptr %idx2, align 4
208  store i32 %op3, ptr %idx3, align 4
209  store i32 %op4, ptr %idx4, align 4
210
211  ret void
212}
213
214define void @nnan(ptr %x) {
215; CHECK-LABEL: @nnan(
216; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
217; CHECK-NEXT:    [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], splat (float 1.000000e+00)
218; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
219; CHECK-NEXT:    ret void
220;
221  %idx2 = getelementptr inbounds float, ptr %x, i64 1
222  %idx3 = getelementptr inbounds float, ptr %x, i64 2
223  %idx4 = getelementptr inbounds float, ptr %x, i64 3
224
225  %load1 = load float, ptr %x, align 4
226  %load2 = load float, ptr %idx2, align 4
227  %load3 = load float, ptr %idx3, align 4
228  %load4 = load float, ptr %idx4, align 4
229
230  %op1 = fadd fast nnan float %load1, 1.0
231  %op2 = fadd nnan ninf float %load2, 1.0
232  %op3 = fadd nsz nnan float %load3, 1.0
233  %op4 = fadd arcp nnan float %load4, 1.0
234
235  store float %op1, ptr %x, align 4
236  store float %op2, ptr %idx2, align 4
237  store float %op3, ptr %idx3, align 4
238  store float %op4, ptr %idx4, align 4
239
240  ret void
241}
242
243define void @not_nnan(ptr %x) {
244; CHECK-LABEL: @not_nnan(
245; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
246; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], splat (float 1.000000e+00)
247; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
248; CHECK-NEXT:    ret void
249;
250  %idx2 = getelementptr inbounds float, ptr %x, i64 1
251  %idx3 = getelementptr inbounds float, ptr %x, i64 2
252  %idx4 = getelementptr inbounds float, ptr %x, i64 3
253
254  %load1 = load float, ptr %x, align 4
255  %load2 = load float, ptr %idx2, align 4
256  %load3 = load float, ptr %idx3, align 4
257  %load4 = load float, ptr %idx4, align 4
258
259  %op1 = fadd nnan float %load1, 1.0
260  %op2 = fadd ninf float %load2, 1.0
261  %op3 = fadd nsz float %load3, 1.0
262  %op4 = fadd arcp float %load4, 1.0
263
264  store float %op1, ptr %x, align 4
265  store float %op2, ptr %idx2, align 4
266  store float %op3, ptr %idx3, align 4
267  store float %op4, ptr %idx4, align 4
268
269  ret void
270}
271
272define void @only_fast(ptr %x) {
273; CHECK-LABEL: @only_fast(
274; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
275; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00)
276; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
277; CHECK-NEXT:    ret void
278;
279  %idx2 = getelementptr inbounds float, ptr %x, i64 1
280  %idx3 = getelementptr inbounds float, ptr %x, i64 2
281  %idx4 = getelementptr inbounds float, ptr %x, i64 3
282
283  %load1 = load float, ptr %x, align 4
284  %load2 = load float, ptr %idx2, align 4
285  %load3 = load float, ptr %idx3, align 4
286  %load4 = load float, ptr %idx4, align 4
287
288  %op1 = fadd fast nnan float %load1, 1.0
289  %op2 = fadd fast nnan ninf float %load2, 1.0
290  %op3 = fadd fast nsz nnan float %load3, 1.0
291  %op4 = fadd arcp nnan fast float %load4, 1.0
292
293  store float %op1, ptr %x, align 4
294  store float %op2, ptr %idx2, align 4
295  store float %op3, ptr %idx3, align 4
296  store float %op4, ptr %idx4, align 4
297
298  ret void
299}
300
301define void @only_arcp(ptr %x) {
302; CHECK-LABEL: @only_arcp(
303; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
304; CHECK-NEXT:    [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], splat (float 1.000000e+00)
305; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
306; CHECK-NEXT:    ret void
307;
308  %idx2 = getelementptr inbounds float, ptr %x, i64 1
309  %idx3 = getelementptr inbounds float, ptr %x, i64 2
310  %idx4 = getelementptr inbounds float, ptr %x, i64 3
311
312  %load1 = load float, ptr %x, align 4
313  %load2 = load float, ptr %idx2, align 4
314  %load3 = load float, ptr %idx3, align 4
315  %load4 = load float, ptr %idx4, align 4
316
317  %op1 = fadd fast float %load1, 1.0
318  %op2 = fadd fast float %load2, 1.0
319  %op3 = fadd fast float %load3, 1.0
320  %op4 = fadd arcp float %load4, 1.0
321
322  store float %op1, ptr %x, align 4
323  store float %op2, ptr %idx2, align 4
324  store float %op3, ptr %idx3, align 4
325  store float %op4, ptr %idx4, align 4
326
327  ret void
328}
329
330define void @addsub_all_nsw(ptr %x) {
331; CHECK-LABEL: @addsub_all_nsw(
332; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
333; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
334; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1)
335; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
336; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
337; CHECK-NEXT:    ret void
338;
339  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
340  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
341  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
342
343  %load1 = load i32, ptr %x, align 4
344  %load2 = load i32, ptr %idx2, align 4
345  %load3 = load i32, ptr %idx3, align 4
346  %load4 = load i32, ptr %idx4, align 4
347
348  %op1 = add nsw i32 %load1, 1
349  %op2 = sub nsw i32 %load2, 1
350  %op3 = add nsw i32 %load3, 1
351  %op4 = sub nsw i32 %load4, 1
352
353  store i32 %op1, ptr %x, align 4
354  store i32 %op2, ptr %idx2, align 4
355  store i32 %op3, ptr %idx3, align 4
356  store i32 %op4, ptr %idx4, align 4
357
358  ret void
359}
360
361define void @addsub_some_nsw(ptr %x) {
362; CHECK-LABEL: @addsub_some_nsw(
363; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
364; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
365; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
366; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
367; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
368; CHECK-NEXT:    ret void
369;
370  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
371  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
372  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
373
374  %load1 = load i32, ptr %x, align 4
375  %load2 = load i32, ptr %idx2, align 4
376  %load3 = load i32, ptr %idx3, align 4
377  %load4 = load i32, ptr %idx4, align 4
378
379  %op1 = add nsw i32 %load1, 1
380  %op2 = sub nsw i32 %load2, 1
381  %op3 = add nsw i32 %load3, 1
382  %op4 = sub i32 %load4, 1
383
384  store i32 %op1, ptr %x, align 4
385  store i32 %op2, ptr %idx2, align 4
386  store i32 %op3, ptr %idx3, align 4
387  store i32 %op4, ptr %idx4, align 4
388
389  ret void
390}
391
392define void @addsub_no_nsw(ptr %x) {
393; CHECK-LABEL: @addsub_no_nsw(
394; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
395; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
396; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
397; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
398; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
399; CHECK-NEXT:    ret void
400;
401  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
402  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
403  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
404
405  %load1 = load i32, ptr %x, align 4
406  %load2 = load i32, ptr %idx2, align 4
407  %load3 = load i32, ptr %idx3, align 4
408  %load4 = load i32, ptr %idx4, align 4
409
410  %op1 = add i32 %load1, 1
411  %op2 = sub nsw i32 %load2, 1
412  %op3 = add nsw i32 %load3, 1
413  %op4 = sub i32 %load4, 1
414
415  store i32 %op1, ptr %x, align 4
416  store i32 %op2, ptr %idx2, align 4
417  store i32 %op3, ptr %idx3, align 4
418  store i32 %op4, ptr %idx4, align 4
419
420  ret void
421}
422
423define void @fcmp_fast(ptr %x) #1 {
424; CHECK-LABEL: @fcmp_fast(
425; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
426; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
427; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> splat (double -0.000000e+00), [[TMP2]]
428; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
429; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
430; CHECK-NEXT:    ret void
431;
432  %idx2 = getelementptr inbounds double, ptr %x, i64 1
433
434  %load1 = load double, ptr %x, align 8
435  %load2 = load double, ptr %idx2, align 8
436
437  %cmp1 = fcmp fast oge double %load1, 0.000000e+00
438  %cmp2 = fcmp fast oge double %load2, 0.000000e+00
439
440  %sub1 = fsub fast double -0.000000e+00, %load1
441  %sub2 = fsub fast double -0.000000e+00, %load2
442
443  %sel1 = select i1 %cmp1, double %load1, double %sub1
444  %sel2 = select i1 %cmp2, double %load2, double %sub2
445
446  store double %sel1, ptr %x, align 8
447  store double %sel2, ptr %idx2, align 8
448
449  ret void
450}
451
452define void @fcmp_fast_unary_fneg(ptr %x) #1 {
453; CHECK-LABEL: @fcmp_fast_unary_fneg(
454; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
455; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
456; CHECK-NEXT:    [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
457; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
458; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
459; CHECK-NEXT:    ret void
460;
461  %idx2 = getelementptr inbounds double, ptr %x, i64 1
462
463  %load1 = load double, ptr %x, align 8
464  %load2 = load double, ptr %idx2, align 8
465
466  %cmp1 = fcmp fast oge double %load1, 0.000000e+00
467  %cmp2 = fcmp fast oge double %load2, 0.000000e+00
468
469  %sub1 = fneg fast double %load1
470  %sub2 = fneg fast double %load2
471
472  %sel1 = select i1 %cmp1, double %load1, double %sub1
473  %sel2 = select i1 %cmp2, double %load2, double %sub2
474
475  store double %sel1, ptr %x, align 8
476  store double %sel2, ptr %idx2, align 8
477
478  ret void
479}
480
481define void @fcmp_no_fast(ptr %x) #1 {
482; CHECK-LABEL: @fcmp_no_fast(
483; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
484; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
485; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]]
486; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
487; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
488; CHECK-NEXT:    ret void
489;
490  %idx2 = getelementptr inbounds double, ptr %x, i64 1
491
492  %load1 = load double, ptr %x, align 8
493  %load2 = load double, ptr %idx2, align 8
494
495  %cmp1 = fcmp fast oge double %load1, 0.000000e+00
496  %cmp2 = fcmp oge double %load2, 0.000000e+00
497
498  %sub1 = fsub fast double -0.000000e+00, %load1
499  %sub2 = fsub double -0.000000e+00, %load2
500
501  %sel1 = select i1 %cmp1, double %load1, double %sub1
502  %sel2 = select i1 %cmp2, double %load2, double %sub2
503
504  store double %sel1, ptr %x, align 8
505  store double %sel2, ptr %idx2, align 8
506
507  ret void
508}
509
510define void @fcmp_no_fast_unary_fneg(ptr %x) #1 {
511; CHECK-LABEL: @fcmp_no_fast_unary_fneg(
512; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
513; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
514; CHECK-NEXT:    [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
515; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
516; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
517; CHECK-NEXT:    ret void
518;
519  %idx2 = getelementptr inbounds double, ptr %x, i64 1
520
521  %load1 = load double, ptr %x, align 8
522  %load2 = load double, ptr %idx2, align 8
523
524  %cmp1 = fcmp fast oge double %load1, 0.000000e+00
525  %cmp2 = fcmp oge double %load2, 0.000000e+00
526
527  %sub1 = fneg double %load1
528  %sub2 = fneg double %load2
529
530  %sel1 = select i1 %cmp1, double %load1, double %sub1
531  %sel2 = select i1 %cmp2, double %load2, double %sub2
532
533  store double %sel1, ptr %x, align 8
534  store double %sel2, ptr %idx2, align 8
535
536  ret void
537}
538
539declare double @llvm.fabs.f64(double) nounwind readnone
540
541define void @call_fast(ptr %x) {
542; CHECK-LABEL: @call_fast(
543; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
544; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
545; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[X]], align 8
546; CHECK-NEXT:    ret void
547;
548  %idx2 = getelementptr inbounds double, ptr %x, i64 1
549
550  %load1 = load double, ptr %x, align 8
551  %load2 = load double, ptr %idx2, align 8
552
553  %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
554  %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone
555
556  store double %call1, ptr %x, align 8
557  store double %call2, ptr %idx2, align 8
558
559  ret void
560}
561
562define void @call_no_fast(ptr %x) {
563; CHECK-LABEL: @call_no_fast(
564; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
565; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
566; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[X]], align 8
567; CHECK-NEXT:    ret void
568;
569  %idx2 = getelementptr inbounds double, ptr %x, i64 1
570
571  %load1 = load double, ptr %x, align 8
572  %load2 = load double, ptr %idx2, align 8
573
574  %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
575  %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone
576
577  store double %call1, ptr %x, align 8
578  store double %call2, ptr %idx2, align 8
579
580  ret void
581}
582
583attributes #1 = { "target-features"="+avx" }
584