xref: /llvm-project/llvm/test/Transforms/SimplifyCFG/speculate-store.ll (revision f445e39ab271d07733f0f45048badd9e58905aec)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
3
4define void @ifconvertstore(ptr %A, i32 %B, i32 %C, i32 %D) {
5; CHECK-LABEL: @ifconvertstore(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
8; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
9; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[C:%.*]], i32 [[B]], !prof [[PROF0:![0-9]+]]
10; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[A]], align 4
11; CHECK-NEXT:    ret void
12;
13entry:
14; First store to the location.
15  store i32 %B, ptr %A
16  %cmp = icmp sgt i32 %D, 42
17  br i1 %cmp, label %if.then, label %ret.end, !prof !0
18
19; Make sure we speculate stores like the following one. It is cheap compared to
20; a mispredicated branch.
21if.then:
22  store i32 %C, ptr %A
23  br label %ret.end
24
25ret.end:
26  ret void
27}
28
29; Store to a different location.
30
31define void @noifconvertstore1(ptr %A1, ptr %A2, i32 %B, i32 %C, i32 %D) {
32; CHECK-LABEL: @noifconvertstore1(
33; CHECK-NEXT:  entry:
34; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A1:%.*]], align 4
35; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
36; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
37; CHECK:       if.then:
38; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A2:%.*]], align 4
39; CHECK-NEXT:    br label [[RET_END]]
40; CHECK:       ret.end:
41; CHECK-NEXT:    ret void
42;
43entry:
44  store i32 %B, ptr %A1
45  %cmp = icmp sgt i32 %D, 42
46  br i1 %cmp, label %if.then, label %ret.end
47
48if.then:
49  store i32 %C, ptr %A2
50  br label %ret.end
51
52ret.end:
53  ret void
54}
55
56; This function could store to our address, so we can't repeat the first store a second time.
57declare void @unknown_fun()
58
59define void @noifconvertstore2(ptr %A, i32 %B, i32 %C, i32 %D) {
60; CHECK-LABEL: @noifconvertstore2(
61; CHECK-NEXT:  entry:
62; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
63; CHECK-NEXT:    call void @unknown_fun()
64; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
65; CHECK-NEXT:    br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
66; CHECK:       if.then:
67; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 4
68; CHECK-NEXT:    br label [[RET_END]]
69; CHECK:       ret.end:
70; CHECK-NEXT:    ret void
71;
72entry:
73; First store to the location.
74  store i32 %B, ptr %A
75  call void @unknown_fun()
76  %cmp6 = icmp sgt i32 %D, 42
77  br i1 %cmp6, label %if.then, label %ret.end
78
79if.then:
80  store i32 %C, ptr %A
81  br label %ret.end
82
83ret.end:
84  ret void
85}
86
87; Make sure we don't speculate volatile stores.
88
89define void @noifconvertstore_volatile(ptr %A, i32 %B, i32 %C, i32 %D) {
90; CHECK-LABEL: @noifconvertstore_volatile(
91; CHECK-NEXT:  entry:
92; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
93; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
94; CHECK-NEXT:    br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
95; CHECK:       if.then:
96; CHECK-NEXT:    store volatile i32 [[C:%.*]], ptr [[A]], align 4
97; CHECK-NEXT:    br label [[RET_END]]
98; CHECK:       ret.end:
99; CHECK-NEXT:    ret void
100;
101entry:
102; First store to the location.
103  store i32 %B, ptr %A
104  %cmp6 = icmp sgt i32 %D, 42
105  br i1 %cmp6, label %if.then, label %ret.end
106
107if.then:
108  store volatile i32 %C, ptr %A
109  br label %ret.end
110
111ret.end:
112  ret void
113}
114
115
116;; Speculate a store, preceded by a local, non-escaping load
117define i32 @load_before_store_noescape(i64 %i, i32 %b)  {
118; CHECK-LABEL: @load_before_store_noescape(
119; CHECK-NEXT:  entry:
120; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
121; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
122; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
123; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
124; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
125; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP0]]
126; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
127; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
128; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
129; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
130; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
131; CHECK-NEXT:    ret i32 [[ADD]]
132;
133entry:
134  %a = alloca [2 x i32], align 8
135  store i64 4294967296, ptr %a, align 8
136  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
137  %0 = load i32, ptr %arrayidx, align 4
138  %cmp = icmp slt i32 %0, %b
139  br i1 %cmp, label %if.then, label %if.end
140
141if.then:
142  store i32 %b, ptr %arrayidx, align 4
143  br label %if.end
144
145if.end:
146  %1 = load i32, ptr %a, align 4
147  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
148  %2 = load i32, ptr %arrayidx2, align 4
149  %add = add nsw i32 %1, %2
150  ret i32 %add
151}
152
153;; Don't speculate a store, preceded by a local, escaping load
154define i32 @load_before_store_escape(i64 %i, i32 %b)  {
155; CHECK-LABEL: @load_before_store_escape(
156; CHECK-NEXT:  entry:
157; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
158; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
159; CHECK-NEXT:    call void @fork_some_threads(ptr [[A]])
160; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
161; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
162; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
163; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
164; CHECK:       if.then:
165; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
166; CHECK-NEXT:    br label [[IF_END]]
167; CHECK:       if.end:
168; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
169; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
170; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
171; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
172; CHECK-NEXT:    call void @join_some_threads()
173; CHECK-NEXT:    ret i32 [[ADD]]
174;
175entry:
176  %a = alloca [2 x i32], align 8
177  store i64 4294967296, ptr %a, align 8
178  call void @fork_some_threads(ptr %a)
179  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
180  %0 = load i32, ptr %arrayidx, align 4
181  %cmp = icmp slt i32 %0, %b
182  br i1 %cmp, label %if.then, label %if.end
183
184if.then:
185  store i32 %b, ptr %arrayidx, align 4
186  br label %if.end
187
188if.end:
189  %1 = load i32, ptr %a, align 4
190  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
191  %2 = load i32, ptr %arrayidx2, align 4
192  %add = add nsw i32 %1, %2
193  call void @join_some_threads()
194  ret i32 %add
195}
196
197define i64 @load_before_store_noescape_byval(ptr byval([2 x i32]) %a, i64 %i, i32 %b)  {
198; CHECK-LABEL: @load_before_store_noescape_byval(
199; CHECK-NEXT:  entry:
200; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
201; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
202; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
203; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
204; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
205; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
206; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
207; CHECK-NEXT:    ret i64 [[V2]]
208;
209entry:
210  store i64 -1, ptr %a, align 8
211  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
212  %v = load i32, ptr %arrayidx, align 4
213  %cmp = icmp slt i32 %v, %b
214  br i1 %cmp, label %if.then, label %if.end
215
216if.then:
217  store i32 %b, ptr %arrayidx, align 4
218  br label %if.end
219
220if.end:
221  %v2 = load i64, ptr %a, align 8
222  ret i64 %v2
223}
224
225declare noalias ptr @malloc(i64 %size)
226
227define i64 @load_before_store_noescape_malloc(i64 %i, i32 %b)  {
228; CHECK-LABEL: @load_before_store_noescape_malloc(
229; CHECK-NEXT:  entry:
230; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 8)
231; CHECK-NEXT:    store i64 -1, ptr [[A]], align 8
232; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
233; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
234; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
235; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
236; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
237; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
238; CHECK-NEXT:    ret i64 [[V2]]
239;
240entry:
241  %a = call ptr @malloc(i64 8)
242  store i64 -1, ptr %a, align 8
243  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
244  %v = load i32, ptr %arrayidx, align 4
245  %cmp = icmp slt i32 %v, %b
246  br i1 %cmp, label %if.then, label %if.end
247
248if.then:
249  store i32 %b, ptr %arrayidx, align 4
250  br label %if.end
251
252if.end:
253  %v2 = load i64, ptr %a, align 8
254  ret i64 %v2
255}
256
257define i64 @load_before_store_noescape_writable(ptr noalias writable dereferenceable(8) %a, i64 %i, i32 %b)  {
258; CHECK-LABEL: @load_before_store_noescape_writable(
259; CHECK-NEXT:  entry:
260; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
261; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
262; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
263; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
264; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
265; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
266; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
267; CHECK-NEXT:    ret i64 [[V2]]
268;
269entry:
270  store i64 -1, ptr %a, align 8
271  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
272  %v = load i32, ptr %arrayidx, align 4
273  %cmp = icmp slt i32 %v, %b
274  br i1 %cmp, label %if.then, label %if.end
275
276if.then:
277  store i32 %b, ptr %arrayidx, align 4
278  br label %if.end
279
280if.end:
281  %v2 = load i64, ptr %a, align 8
282  ret i64 %v2
283}
284
285define i64 @load_before_store_noescape_writable_missing_noalias(ptr writable dereferenceable(8) %a, i64 %i, i32 %b)  {
286; CHECK-LABEL: @load_before_store_noescape_writable_missing_noalias(
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
289; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
290; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
291; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
292; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
293; CHECK:       if.then:
294; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
295; CHECK-NEXT:    br label [[IF_END]]
296; CHECK:       if.end:
297; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
298; CHECK-NEXT:    ret i64 [[V2]]
299;
300entry:
301  store i64 -1, ptr %a, align 8
302  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
303  %v = load i32, ptr %arrayidx, align 4
304  %cmp = icmp slt i32 %v, %b
305  br i1 %cmp, label %if.then, label %if.end
306
307if.then:
308  store i32 %b, ptr %arrayidx, align 4
309  br label %if.end
310
311if.end:
312  %v2 = load i64, ptr %a, align 8
313  ret i64 %v2
314}
315
316define i64 @load_before_store_noescape_writable_missing_derefable(ptr noalias writable %a, i64 %i, i32 %b)  {
317; CHECK-LABEL: @load_before_store_noescape_writable_missing_derefable(
318; CHECK-NEXT:  entry:
319; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
320; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
321; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
322; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
323; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
324; CHECK:       if.then:
325; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
326; CHECK-NEXT:    br label [[IF_END]]
327; CHECK:       if.end:
328; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
329; CHECK-NEXT:    ret i64 [[V2]]
330;
331entry:
332  store i64 -1, ptr %a, align 8
333  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
334  %v = load i32, ptr %arrayidx, align 4
335  %cmp = icmp slt i32 %v, %b
336  br i1 %cmp, label %if.then, label %if.end
337
338if.then:
339  store i32 %b, ptr %arrayidx, align 4
340  br label %if.end
341
342if.end:
343  %v2 = load i64, ptr %a, align 8
344  ret i64 %v2
345}
346
347declare void @fork_some_threads(ptr);
348declare void @join_some_threads();
349
350; Don't speculate if it's not the only instruction in the block (not counting
351; the terminator)
352define i32 @not_alone_in_block(i64 %i, i32 %b)  {
353; CHECK-LABEL: @not_alone_in_block(
354; CHECK-NEXT:  entry:
355; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
356; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
357; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
358; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
359; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
360; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
361; CHECK:       if.then:
362; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
363; CHECK-NEXT:    store i32 [[B]], ptr [[A]], align 4
364; CHECK-NEXT:    br label [[IF_END]]
365; CHECK:       if.end:
366; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
367; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
368; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
369; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
370; CHECK-NEXT:    ret i32 [[ADD]]
371;
372entry:
373  %a = alloca [2 x i32], align 8
374  store i64 4294967296, ptr %a, align 8
375  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
376  %0 = load i32, ptr %arrayidx, align 4
377  %cmp = icmp slt i32 %0, %b
378  br i1 %cmp, label %if.then, label %if.end
379
380if.then:
381  store i32 %b, ptr %arrayidx, align 4
382  store i32 %b, ptr %a, align 4
383  br label %if.end
384
385if.end:
386  %1 = load i32, ptr %a, align 4
387  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
388  %2 = load i32, ptr %arrayidx2, align 4
389  %add = add nsw i32 %1, %2
390  ret i32 %add
391}
392
393define void @wrong_align_store(ptr %A, i32 %B, i32 %C, i32 %D) {
394; CHECK-LABEL: @wrong_align_store(
395; CHECK-NEXT:  entry:
396; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
397; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
398; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
399; CHECK:       if.then:
400; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 8
401; CHECK-NEXT:    br label [[RET_END]]
402; CHECK:       ret.end:
403; CHECK-NEXT:    ret void
404;
405entry:
406  store i32 %B, ptr %A, align 4
407  %cmp = icmp sgt i32 %D, 42
408  br i1 %cmp, label %if.then, label %ret.end
409
410if.then:
411  store i32 %C, ptr %A, align 8
412  br label %ret.end
413
414ret.end:
415  ret void
416}
417
418define void @wrong_align_load(i32 %C, i32 %D) {
419; CHECK-LABEL: @wrong_align_load(
420; CHECK-NEXT:  entry:
421; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
422; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
423; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
424; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
425; CHECK:       if.then:
426; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 8
427; CHECK-NEXT:    br label [[RET_END]]
428; CHECK:       ret.end:
429; CHECK-NEXT:    ret void
430;
431entry:
432  %A = alloca i32, align 4
433  load i32, ptr %A, align 4
434  %cmp = icmp sgt i32 %D, 42
435  br i1 %cmp, label %if.then, label %ret.end
436
437if.then:
438  store i32 %C, ptr %A, align 8
439  br label %ret.end
440
441ret.end:
442  ret void
443}
444
445; CHECK: !0 = !{!"branch_weights", i32 3, i32 5}
446!0 = !{!"branch_weights", i32 3, i32 5}
447
448