xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll (revision e39f6c1844fab59c638d8059a6cf139adb42279a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
3
4declare i64 @may_inf_loop_ro() nounwind readonly
5declare i64 @may_inf_loop_rw() nounwind
6declare i64 @may_throw() willreturn
7
8; Base case with no interesting control dependencies
9define void @test_no_control(ptr %a, ptr %b, ptr %c) {
10; CHECK-LABEL: @test_no_control(
11; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
12; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
13; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
14; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[B:%.*]], align 8
15; CHECK-NEXT:    ret void
16;
17  %v1 = load i64, ptr %a
18  %a2 = getelementptr i64, ptr %a, i32 1
19  %v2 = load i64, ptr %a2
20
21  %c1 = load i64, ptr %c
22  %ca2 = getelementptr i64, ptr %c, i32 1
23  %c2 = load i64, ptr %ca2
24  %add1 = add i64 %v1, %c1
25  %add2 = add i64 %v2, %c2
26
27  store i64 %add1, ptr %b
28  %b2 = getelementptr i64, ptr %b, i32 1
29  store i64 %add2, ptr %b2
30  ret void
31}
32
33define void @test1(ptr %a, ptr %b, ptr %c) {
34; CHECK-LABEL: @test1(
35; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
36; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
37; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
38; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
39; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
40; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
41; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
42; CHECK-NEXT:    ret void
43;
44  %v1 = load i64, ptr %a
45  %a2 = getelementptr i64, ptr %a, i32 1
46  %v2 = load i64, ptr %a2
47
48  %c1 = load i64, ptr %c
49  %c2 = call i64 @may_inf_loop_ro()
50  %add1 = add i64 %v1, %c1
51  %add2 = add i64 %v2, %c2
52
53  store i64 %add1, ptr %b
54  %b2 = getelementptr i64, ptr %b, i32 1
55  store i64 %add2, ptr %b2
56  ret void
57}
58
59define void @test2(ptr %a, ptr %b, ptr %c) {
60; CHECK-LABEL: @test2(
61; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
62; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
63; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
64; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
65; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
66; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
67; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
68; CHECK-NEXT:    ret void
69;
70  %c1 = load i64, ptr %c
71  %c2 = call i64 @may_inf_loop_ro()
72
73  %v1 = load i64, ptr %a
74  %a2 = getelementptr i64, ptr %a, i32 1
75  %v2 = load i64, ptr %a2
76
77  %add1 = add i64 %v1, %c1
78  %add2 = add i64 %v2, %c2
79
80  store i64 %add1, ptr %b
81  %b2 = getelementptr i64, ptr %b, i32 1
82  store i64 %add2, ptr %b2
83  ret void
84}
85
86define void @test3(ptr %a, ptr %b, ptr %c) {
87; CHECK-LABEL: @test3(
88; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
89; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
90; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
91; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
92; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
93; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
94; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
95; CHECK-NEXT:    ret void
96;
97  %v1 = load i64, ptr %a
98  %c1 = load i64, ptr %c
99  %add1 = add i64 %v1, %c1
100
101  %a2 = getelementptr i64, ptr %a, i32 1
102  %v2 = load i64, ptr %a2
103  %c2 = call i64 @may_inf_loop_ro()
104  %add2 = add i64 %v2, %c2
105
106  store i64 %add1, ptr %b
107  %b2 = getelementptr i64, ptr %b, i32 1
108  store i64 %add2, ptr %b2
109  ret void
110}
111
112define void @test4(ptr %a, ptr %b, ptr %c) {
113; CHECK-LABEL: @test4(
114; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
115; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
116; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
117; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
118; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
119; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
120; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
121; CHECK-NEXT:    ret void
122;
123  %v1 = load i64, ptr %a
124  %c1 = load i64, ptr %c
125  %add1 = add i64 %v1, %c1
126
127  %c2 = call i64 @may_inf_loop_ro()
128  %a2 = getelementptr i64, ptr %a, i32 1
129  %v2 = load i64, ptr %a2
130  %add2 = add i64 %v2, %c2
131
132  store i64 %add1, ptr %b
133  %b2 = getelementptr i64, ptr %b, i32 1
134  store i64 %add2, ptr %b2
135  ret void
136}
137
138define void @test5(ptr %a, ptr %b, ptr %c) {
139; CHECK-LABEL: @test5(
140; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
141; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8
142; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
143; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
144; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1
145; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
146; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
147; CHECK-NEXT:    ret void
148;
149  %a2 = getelementptr i64, ptr %a, i32 1
150  %v2 = load i64, ptr %a2
151  %c2 = call i64 @may_inf_loop_ro()
152  %add2 = add i64 %v2, %c2
153
154  %v1 = load i64, ptr %a
155  %c1 = load i64, ptr %c
156  %add1 = add i64 %v1, %c1
157
158  store i64 %add1, ptr %b
159  %b2 = getelementptr i64, ptr %b, i32 1
160  store i64 %add2, ptr %b2
161  ret void
162}
163
164define void @test6(ptr %a, ptr %b, ptr %c) {
165; CHECK-LABEL: @test6(
166; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
167; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
168; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
169; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
170; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8
171; CHECK-NEXT:    ret void
172;
173  %v1 = load i64, ptr %a
174  call i64 @may_inf_loop_ro()
175  %a2 = getelementptr i64, ptr %a, i32 1
176  %v2 = load i64, ptr %a2
177
178  %c1 = load i64, ptr %c
179  %ca2 = getelementptr i64, ptr %c, i32 1
180  %c2 = load i64, ptr %ca2
181  %add1 = add i64 %v1, %c1
182  %add2 = add i64 %v2, %c2
183
184  store i64 %add1, ptr %b
185  %b2 = getelementptr i64, ptr %b, i32 1
186  store i64 %add2, ptr %b2
187  ret void
188}
189
190; In this case, we can't vectorize the load pair because there's no valid
191; scheduling point which respects both memory and control dependence.  If
192; we scheduled the second load before the store holding the first one in place,
193; we'd have hoisted a potentially faulting load above a potentially infinite
194; call and thus have introduced a possible fault into a program which didn't
195; previously exist.
196define void @test7(ptr %a, ptr %b, ptr %c) {
197; CHECK-LABEL: @test7(
198; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
199; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 8
200; CHECK-NEXT:    store i64 0, ptr [[A]], align 8
201; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
202; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 8
203; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
204; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
205; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
206; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
207; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
208; CHECK-NEXT:    ret void
209;
210  %v1 = load i64, ptr %a
211  store i64 0, ptr %a
212  call i64 @may_inf_loop_ro()
213  %a2 = getelementptr i64, ptr %a, i32 1
214  %v2 = load i64, ptr %a2
215
216  %c1 = load i64, ptr %c
217  %ca2 = getelementptr i64, ptr %c, i32 1
218  %c2 = load i64, ptr %ca2
219  %add1 = add i64 %v1, %c1
220  %add2 = add i64 %v2, %c2
221
222  store i64 %add1, ptr %b
223  %b2 = getelementptr i64, ptr %b, i32 1
224  store i64 %add2, ptr %b2
225  ret void
226}
227
228; Same as test7, but with a throwing call
229define void @test8(ptr %a, ptr %b, ptr %c) {
230; CHECK-LABEL: @test8(
231; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
232; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 8
233; CHECK-NEXT:    store i64 0, ptr [[A]], align 8
234; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]]
235; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 8
236; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
237; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
238; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
239; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
240; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
241; CHECK-NEXT:    ret void
242;
243  %v1 = load i64, ptr %a
244  store i64 0, ptr %a
245  call i64 @may_throw() readonly
246  %a2 = getelementptr i64, ptr %a, i32 1
247  %v2 = load i64, ptr %a2
248
249  %c1 = load i64, ptr %c
250  %ca2 = getelementptr i64, ptr %c, i32 1
251  %c2 = load i64, ptr %ca2
252  %add1 = add i64 %v1, %c1
253  %add2 = add i64 %v2, %c2
254
255  store i64 %add1, ptr %b
256  %b2 = getelementptr i64, ptr %b, i32 1
257  store i64 %add2, ptr %b2
258  ret void
259}
260
261; Same as test8, but with a readwrite maythrow call
262define void @test9(ptr %a, ptr %b, ptr %c) {
263; CHECK-LABEL: @test9(
264; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
265; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 8
266; CHECK-NEXT:    store i64 0, ptr [[A]], align 8
267; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw()
268; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 8
269; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
270; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
271; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1
272; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
273; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
274; CHECK-NEXT:    ret void
275;
276  %v1 = load i64, ptr %a
277  store i64 0, ptr %a
278  call i64 @may_throw()
279  %a2 = getelementptr i64, ptr %a, i32 1
280  %v2 = load i64, ptr %a2
281
282  %c1 = load i64, ptr %c
283  %ca2 = getelementptr i64, ptr %c, i32 1
284  %c2 = load i64, ptr %ca2
285  %add1 = add i64 %v1, %c1
286  %add2 = add i64 %v2, %c2
287
288  store i64 %add1, ptr %b
289  %b2 = getelementptr i64, ptr %b, i32 1
290  store i64 %add2, ptr %b2
291  ret void
292}
293
294; A variant of test7 which shows the same problem with a non-load instruction
295define void @test10(ptr %a, ptr %b, ptr %c) {
296; CHECK-LABEL: @test10(
297; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A:%.*]], align 8
298; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A]], i32 1
299; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 8
300; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[V1]]
301; CHECK-NEXT:    store i64 [[U1]], ptr [[A]], align 8
302; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
303; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[V2]]
304; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
305; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
306; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1
307; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
308; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8
309; CHECK-NEXT:    ret void
310;
311  %v1 = load i64, ptr %a
312  %a2 = getelementptr i64, ptr %a, i32 1
313  %v2 = load i64, ptr %a2
314
315  %u1 = udiv i64 200, %v1
316  store i64 %u1, ptr %a
317  call i64 @may_inf_loop_ro()
318  %u2 = udiv i64 200, %v2
319
320  %c1 = load i64, ptr %c
321  %ca2 = getelementptr i64, ptr %c, i32 1
322  %c2 = load i64, ptr %ca2
323  %add1 = add i64 %u1, %c1
324  %add2 = add i64 %u2, %c2
325
326  store i64 %add1, ptr %b
327  %b2 = getelementptr i64, ptr %b, i32 1
328  store i64 %add2, ptr %b2
329  ret void
330}
331
332; Variant of test10 block invariant operands to the udivs
333; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop.
334define void @test11(i64 %x, i64 %y, ptr %b, ptr %c) {
335; CHECK-LABEL: @test11(
336; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[X:%.*]]
337; CHECK-NEXT:    store i64 [[U1]], ptr [[B:%.*]], align 8
338; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
339; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[Y:%.*]]
340; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8
341; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
342; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1
343; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]]
344; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B]], align 8
345; CHECK-NEXT:    ret void
346;
347  %u1 = udiv i64 200, %x
348  store i64 %u1, ptr %b
349  call i64 @may_inf_loop_ro()
350  %u2 = udiv i64 200, %y
351
352  %c1 = load i64, ptr %c
353  %ca2 = getelementptr i64, ptr %c, i32 1
354  %c2 = load i64, ptr %ca2
355  %add1 = add i64 %u1, %c1
356  %add2 = add i64 %u2, %c2
357
358  store i64 %add1, ptr %b
359  %b2 = getelementptr i64, ptr %b, i32 1
360  store i64 %add2, ptr %b2
361  ret void
362}
363