xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
3
4declare i64 @may_inf_loop_ro() nounwind readonly
5
6; Base case without allocas or stacksave
7define void @basecase(ptr %a, ptr %b, ptr %c) {
8; CHECK-LABEL: @basecase(
9; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
10; CHECK-NEXT:    store ptr null, ptr [[A]], align 8
11; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
12; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
13; CHECK-NEXT:    ret void
14;
15
16  %v1 = load ptr, ptr %a
17  store ptr zeroinitializer, ptr %a
18  %a2 = getelementptr ptr, ptr %a, i32 1
19  %v2 = load ptr, ptr %a2
20
21  %add1 = getelementptr i8, ptr %v1, i32 1
22  %add2 = getelementptr i8, ptr %v2, i32 1
23
24  store ptr %add1, ptr %b
25  %b2 = getelementptr ptr, ptr %b, i32 1
26  store ptr %add2, ptr %b2
27  ret void
28}
29
30; Using two allocas and a buildvector
31define void @allocas(ptr %a, ptr %b, ptr %c) {
32; CHECK-LABEL: @allocas(
33; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
34; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
35; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
36; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
37; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
38; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
39; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
40; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
41; CHECK-NEXT:    ret void
42;
43
44  %v1 = alloca i8
45  %add1 = getelementptr i8, ptr %v1, i32 1
46  store ptr %add1, ptr %a
47  %v2 = alloca i8
48
49  %add2 = getelementptr i8, ptr %v2, i32 1
50
51  store ptr %add1, ptr %b
52  %b2 = getelementptr ptr, ptr %b, i32 1
53  store ptr %add2, ptr %b2
54  ret void
55}
56
57; Allocas can not be speculated above a potentially non-returning call
58define void @allocas_speculation(ptr %a, ptr %b, ptr %c) {
59; CHECK-LABEL: @allocas_speculation(
60; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
61; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
62; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
63; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
64; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
65; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
66; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
67; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
68; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
69; CHECK-NEXT:    ret void
70;
71
72  %v1 = alloca i8
73  %add1 = getelementptr i8, ptr %v1, i32 1
74  store ptr %add1, ptr %a
75  call i64 @may_inf_loop_ro()
76  %v2 = alloca i8
77
78  %add2 = getelementptr i8, ptr %v2, i32 1
79
80  store ptr %add1, ptr %b
81  %b2 = getelementptr ptr, ptr %b, i32 1
82  store ptr %add2, ptr %b2
83  ret void
84}
85
86; We must be careful not to lift the inalloca alloc above the stacksave here.
87; We used to miscompile this example before adding explicit dependency handling
88; for stacksave.
89define void @stacksave(ptr %a, ptr %b, ptr %c) {
90; CHECK-LABEL: @stacksave(
91; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
92; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
93; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
94; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
95; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
96; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
97; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
98; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
99; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
100; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
101; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
102; CHECK-NEXT:    ret void
103;
104
105  %v1 = alloca i8
106  %add1 = getelementptr i8, ptr %v1, i32 1
107  store ptr %add1, ptr %a
108
109  %stack = call ptr @llvm.stacksave()
110  %v2 = alloca inalloca i8
111  call void @use(ptr inalloca(i8) %v2) readnone
112  call void @llvm.stackrestore(ptr %stack)
113
114  %add2 = getelementptr i8, ptr %v2, i32 1
115
116  store ptr %add1, ptr %b
117  %b2 = getelementptr ptr, ptr %b, i32 1
118  store ptr %add2, ptr %b2
119  ret void
120}
121
122define void @stacksave2(ptr %a, ptr %b, ptr %c) {
123; CHECK-LABEL: @stacksave2(
124; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
125; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
126; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
127; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
128; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
129; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
130; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
131; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
132; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
133; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
134; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
135; CHECK-NEXT:    ret void
136;
137
138  %v1 = alloca i8
139  %add1 = getelementptr i8, ptr %v1, i32 1
140
141  %stack = call ptr @llvm.stacksave()
142  store ptr %add1, ptr %a
143  %v2 = alloca inalloca i8
144  call void @use(ptr inalloca(i8) %v2) readonly
145  call void @llvm.stackrestore(ptr %stack)
146
147  %add2 = getelementptr i8, ptr %v2, i32 1
148
149  store ptr %add1, ptr %b
150  %b2 = getelementptr ptr, ptr %b, i32 1
151  store ptr %add2, ptr %b2
152  ret void
153}
154
155define void @stacksave3(ptr %a, ptr %b, ptr %c) {
156; CHECK-LABEL: @stacksave3(
157; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
158; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
159; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
160; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]]
161; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
162; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
163; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
164; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
165; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
166; CHECK-NEXT:    ret void
167;
168
169  %stack = call ptr @llvm.stacksave()
170  %v1 = alloca i8
171
172  %v2 = alloca inalloca i8
173  call void @use(ptr inalloca(i8) %v2) readnone
174  call void @llvm.stackrestore(ptr %stack)
175
176  %add1 = getelementptr i8, ptr %v1, i32 1
177  %add2 = getelementptr i8, ptr %v2, i32 1
178
179  store ptr %add1, ptr %b
180  %b2 = getelementptr ptr, ptr %b, i32 1
181  store ptr %add2, ptr %b2
182  ret void
183}
184
185; Here we have an alloca which needs to stay under the stacksave, but is not
186; directly part of the vectorization tree.  Instead, the stacksave is
187; encountered during dependency scanning via the memory chain.
188define void @stacksave4(ptr %a, ptr %b, ptr %c) {
189; CHECK-LABEL: @stacksave4(
190; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
191; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
192; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
193; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
194; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
195; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
196; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
197; CHECK-NEXT:    ret void
198;
199
200  %v1 = load ptr, ptr %a
201  %a2 = getelementptr ptr, ptr %a, i32 1
202  %v2 = load ptr, ptr %a2
203
204  %add1 = getelementptr i8, ptr %v1, i32 1
205  %add2 = getelementptr i8, ptr %v2, i32 1
206
207  %stack = call ptr @llvm.stacksave()
208  %x = alloca inalloca i8
209  call void @use(ptr inalloca(i8) %x) readnone
210  call void @llvm.stackrestore(ptr %stack)
211
212  store ptr %add1, ptr %b
213  %b2 = getelementptr ptr, ptr %b, i32 1
214  store ptr %add2, ptr %b2
215  ret void
216}
217
218define void @stacksave5(ptr %a, ptr %b, ptr %c) {
219; CHECK-LABEL: @stacksave5(
220; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
221; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
222; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
223; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
224; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
225; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
226; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
227; CHECK-NEXT:    ret void
228;
229
230  %v1 = load ptr, ptr %a
231  %a2 = getelementptr ptr, ptr %a, i32 1
232  %v2 = load ptr, ptr %a2
233
234  %add1 = getelementptr i8, ptr %v1, i32 1
235  %add2 = getelementptr i8, ptr %v2, i32 1
236
237  %stack = call ptr @llvm.stacksave()
238  %x = alloca inalloca i8
239  call void @use(ptr inalloca(i8) %x) readnone
240  call void @llvm.stackrestore(ptr %stack)
241
242  store ptr %add1, ptr %b
243  %b2 = getelementptr ptr, ptr %b, i32 1
244  store ptr %add2, ptr %b2
245  ret void
246}
247
248; Reordering the second alloca above the stackrestore while
249; leaving the write to it below would introduce a write-after-free
250; bug.
251define void @stackrestore1(ptr %a, ptr %b, ptr %c) {
252; CHECK-LABEL: @stackrestore1(
253; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
254; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
255; CHECK-NEXT:    store i8 0, ptr [[V1]], align 1
256; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
257; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
258; CHECK-NEXT:    store i8 0, ptr [[V2]], align 1
259; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
260; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
261; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
262; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
263; CHECK-NEXT:    ret void
264;
265
266  %stack = call ptr @llvm.stacksave()
267  %v1 = alloca i8
268  store i8 0, ptr %v1
269  call void @llvm.stackrestore(ptr %stack)
270  %v2 = alloca i8
271  store i8 0, ptr %v2
272
273  %add1 = getelementptr i8, ptr %v1, i32 1
274  %add2 = getelementptr i8, ptr %v2, i32 1
275
276  store ptr %add1, ptr %b
277  %b2 = getelementptr ptr, ptr %b, i32 1
278  store ptr %add2, ptr %b2
279  ret void
280}
281
282declare void @use(ptr inalloca(i8))
283declare ptr @llvm.stacksave()
284declare void @llvm.stackrestore(ptr)
285
286; The next set are reduced from previous regressions.
287
288declare ptr @wibble(ptr)
289declare void @quux(ptr inalloca(i32))
290
291define void @ham() #1 {
292; CHECK-LABEL: @ham(
293; CHECK-NEXT:    [[VAR2:%.*]] = alloca i8, align 1
294; CHECK-NEXT:    [[VAR3:%.*]] = alloca i8, align 1
295; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
296; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
297; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
298; CHECK-NEXT:    [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]])
299; CHECK-NEXT:    [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]])
300; CHECK-NEXT:    [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]])
301; CHECK-NEXT:    [[VAR23:%.*]] = call ptr @llvm.stacksave.p0()
302; CHECK-NEXT:    [[VAR24:%.*]] = alloca inalloca i32, align 4
303; CHECK-NEXT:    call void @quux(ptr inalloca(i32) [[VAR24]])
304; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[VAR23]])
305; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
306; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
307; CHECK-NEXT:    store <4 x ptr> [[TMP2]], ptr [[VAR12]], align 8
308; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
309; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
310; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
311; CHECK-NEXT:    store <4 x ptr> [[TMP4]], ptr [[VAR36]], align 8
312; CHECK-NEXT:    ret void
313;
314  %var2 = alloca i8
315  %var3 = alloca i8
316  %var4 = alloca i8
317  %var5 = alloca i8
318  %var12 = alloca [12 x ptr]
319  %var15 = call ptr @wibble(ptr %var2)
320  %var16 = call ptr @wibble(ptr %var3)
321  %var17 = call ptr @wibble(ptr %var4)
322  %var23 = call ptr @llvm.stacksave()
323  %var24 = alloca inalloca i32
324  call void @quux(ptr inalloca(i32) %var24)
325  call void @llvm.stackrestore(ptr %var23)
326  store ptr %var4, ptr %var12
327  %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1
328  store ptr %var4, ptr %var33
329  %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2
330  store ptr %var4, ptr %var34
331  %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3
332  store ptr %var4, ptr %var35
333  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
334  store ptr %var4, ptr %var36
335  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
336  store ptr %var5, ptr %var37
337  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
338  store ptr %var5, ptr %var38
339  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
340  store ptr %var5, ptr %var39
341  ret void
342}
343
344define void @spam() #1 {
345; CHECK-LABEL: @spam(
346; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
347; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
348; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
349; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
350; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
351; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
352; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
353; CHECK-NEXT:    store <4 x ptr> [[TMP3]], ptr [[VAR36]], align 8
354; CHECK-NEXT:    ret void
355;
356  %var4 = alloca i8
357  %var5 = alloca i8
358  %var12 = alloca [12 x ptr]
359  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
360  store ptr %var4, ptr %var36
361  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
362  store ptr %var5, ptr %var37
363  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
364  store ptr %var5, ptr %var38
365  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
366  store ptr %var5, ptr %var39
367  ret void
368}
369
370attributes #0 = { nofree nosync nounwind willreturn }
371attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }
372