xref: /llvm-project/llvm/test/Transforms/EarlyCSE/basic.ll (revision 297c10fd175a2f0ff7cb293fcb2d149b065eaabe)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s
3; RUN: opt < %s -S -passes='early-cse<memssa>' | FileCheck %s
4; RUN: opt < %s -S -passes=early-cse | FileCheck %s
5
6declare void @llvm.assume(i1) nounwind
7
8define void @test1(i8 %V, ptr%P) {
9; CHECK-LABEL: @test1(
10; CHECK-NEXT:    store i32 23, ptr [[P:%.*]], align 4
11; CHECK-NEXT:    [[C:%.*]] = zext i8 [[V:%.*]] to i32
12; CHECK-NEXT:    store volatile i32 [[C]], ptr [[P]], align 4
13; CHECK-NEXT:    store volatile i32 [[C]], ptr [[P]], align 4
14; CHECK-NEXT:    [[E:%.*]] = add i32 [[C]], [[C]]
15; CHECK-NEXT:    store volatile i32 [[E]], ptr [[P]], align 4
16; CHECK-NEXT:    store volatile i32 [[E]], ptr [[P]], align 4
17; CHECK-NEXT:    store volatile i32 [[E]], ptr [[P]], align 4
18; CHECK-NEXT:    ret void
19;
20  %A = bitcast i64 42 to double  ;; dead
21  %B = add i32 4, 19             ;; constant folds
22  store i32 %B, ptr %P
23
24  %C = zext i8 %V to i32
25  %D = zext i8 %V to i32  ;; CSE
26  store volatile i32 %C, ptr %P
27  store volatile i32 %D, ptr %P
28
29  %E = add i32 %C, %C
30  %F = add i32 %C, %C
31  store volatile i32 %E, ptr %P
32  store volatile i32 %F, ptr %P
33
34  %G = add nuw i32 %C, %C
35  store volatile i32 %G, ptr %P
36  ret void
37}
38
39
40;; Simple load value numbering.
41define i32 @test2(ptr%P) {
42; CHECK-LABEL: @test2(
43; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
44; CHECK-NEXT:    ret i32 0
45;
46  %V1 = load i32, ptr %P
47  %V2 = load i32, ptr %P
48  %Diff = sub i32 %V1, %V2
49  ret i32 %Diff
50}
51
52define i32 @test2a(ptr%P, i1 %b) {
53; CHECK-LABEL: @test2a(
54; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
55; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
56; CHECK-NEXT:    ret i32 0
57;
58  %V1 = load i32, ptr %P
59  tail call void @llvm.assume(i1 %b)
60  %V2 = load i32, ptr %P
61  %Diff = sub i32 %V1, %V2
62  ret i32 %Diff
63}
64
65;; Cross block load value numbering.
66define i32 @test3(ptr%P, i1 %Cond) {
67; CHECK-LABEL: @test3(
68; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
69; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
70; CHECK:       T:
71; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
72; CHECK-NEXT:    ret i32 42
73; CHECK:       F:
74; CHECK-NEXT:    ret i32 0
75;
76  %V1 = load i32, ptr %P
77  br i1 %Cond, label %T, label %F
78T:
79  store i32 4, ptr %P
80  ret i32 42
81F:
82  %V2 = load i32, ptr %P
83  %Diff = sub i32 %V1, %V2
84  ret i32 %Diff
85}
86
87define i32 @test3a(ptr%P, i1 %Cond, i1 %b) {
88; CHECK-LABEL: @test3a(
89; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
90; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
91; CHECK:       T:
92; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
93; CHECK-NEXT:    ret i32 42
94; CHECK:       F:
95; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
96; CHECK-NEXT:    ret i32 0
97;
98  %V1 = load i32, ptr %P
99  br i1 %Cond, label %T, label %F
100T:
101  store i32 4, ptr %P
102  ret i32 42
103F:
104  tail call void @llvm.assume(i1 %b)
105  %V2 = load i32, ptr %P
106  %Diff = sub i32 %V1, %V2
107  ret i32 %Diff
108}
109
110;; Cross block load value numbering stops when stores happen.
111define i32 @test4(ptr%P, i1 %Cond) {
112; CHECK-LABEL: @test4(
113; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
114; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
115; CHECK:       T:
116; CHECK-NEXT:    ret i32 42
117; CHECK:       F:
118; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
119; CHECK-NEXT:    [[DIFF:%.*]] = sub i32 [[V1]], 42
120; CHECK-NEXT:    ret i32 [[DIFF]]
121;
122  %V1 = load i32, ptr %P
123  br i1 %Cond, label %T, label %F
124T:
125  ret i32 42
126F:
127  ; Clobbers V1
128  store i32 42, ptr %P
129
130  %V2 = load i32, ptr %P
131  %Diff = sub i32 %V1, %V2
132  ret i32 %Diff
133}
134
135declare i32 @func(ptr%P) readonly
136
137;; Simple call CSE'ing.
138define i32 @test5(ptr%P) {
139; CHECK-LABEL: @test5(
140; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]), !prof !0
141; CHECK-NEXT:    ret i32 0
142;
143  %V1 = call i32 @func(ptr %P), !prof !0
144  %V2 = call i32 @func(ptr %P), !prof !1
145  %Diff = sub i32 %V1, %V2
146  ret i32 %Diff
147}
148
149!0 = !{!"branch_weights", i32 95}
150!1 = !{!"branch_weights", i32 95}
151
152;; Trivial Store->load forwarding
153define i32 @test6(ptr%P) {
154; CHECK-LABEL: @test6(
155; CHECK-NEXT:    store i32 42, ptr [[P:%.*]], align 4
156; CHECK-NEXT:    ret i32 42
157;
158  store i32 42, ptr %P
159  %V1 = load i32, ptr %P
160  ret i32 %V1
161}
162
163define i32 @test6a(ptr%P, i1 %b) {
164; CHECK-LABEL: @test6a(
165; CHECK-NEXT:    store i32 42, ptr [[P:%.*]], align 4
166; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
167; CHECK-NEXT:    ret i32 42
168;
169  store i32 42, ptr %P
170  tail call void @llvm.assume(i1 %b)
171  %V1 = load i32, ptr %P
172  ret i32 %V1
173}
174
175;; Trivial dead store elimination.
176define void @test7(ptr%P) {
177; CHECK-LABEL: @test7(
178; CHECK-NEXT:    store i32 45, ptr [[P:%.*]], align 4
179; CHECK-NEXT:    ret void
180;
181  store i32 42, ptr %P
182  store i32 45, ptr %P
183  ret void
184}
185
186;; Readnone functions aren't invalidated by stores.
187define i32 @test8(ptr%P) {
188; CHECK-LABEL: @test8(
189; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2:[0-9]+]]
190; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
191; CHECK-NEXT:    ret i32 0
192;
193  %V1 = call i32 @func(ptr %P) readnone
194  store i32 4, ptr %P
195  %V2 = call i32 @func(ptr %P) readnone
196  %Diff = sub i32 %V1, %V2
197  ret i32 %Diff
198}
199
200;; Trivial DSE can't be performed across a readonly call.  The call
201;; can observe the earlier write.
202define i32 @test9(ptr%P) {
203; CHECK-LABEL: @test9(
204; CHECK-NEXT:    store i32 4, ptr [[P:%.*]], align 4
205; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(ptr [[P]]) #[[ATTR1:[0-9]+]]
206; CHECK-NEXT:    store i32 5, ptr [[P]], align 4
207; CHECK-NEXT:    ret i32 [[V1]]
208;
209  store i32 4, ptr %P
210  %V1 = call i32 @func(ptr %P) readonly
211  store i32 5, ptr %P
212  ret i32 %V1
213}
214
215;; Trivial DSE can be performed across a readnone call.
216define i32 @test10(ptr%P) {
217; CHECK-LABEL: @test10(
218; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2]]
219; CHECK-NEXT:    store i32 5, ptr [[P]], align 4
220; CHECK-NEXT:    ret i32 [[V1]]
221;
222  store i32 4, ptr %P
223  %V1 = call i32 @func(ptr %P) readnone
224  store i32 5, ptr %P
225  ret i32 %V1
226}
227
228;; Trivial dead store elimination - should work for an entire series of dead stores too.
229define void @test11(ptr%P) {
230; CHECK-LABEL: @test11(
231; CHECK-NEXT:    store i32 45, ptr [[P:%.*]], align 4
232; CHECK-NEXT:    ret void
233;
234  store i32 42, ptr %P
235  store i32 43, ptr %P
236  store i32 44, ptr %P
237  store i32 45, ptr %P
238  ret void
239}
240
241define i32 @test12(i1 %B, ptr %P1, ptr %P2) {
242; CHECK-LABEL: @test12(
243; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, ptr [[P1:%.*]], align 4
244; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[P2:%.*]] seq_cst, align 4
245; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[P1]], align 4
246; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]]
247; CHECK-NEXT:    ret i32 [[SEL]]
248;
249  %load0 = load i32, ptr %P1
250  %1 = load atomic i32, ptr %P2 seq_cst, align 4
251  %load1 = load i32, ptr %P1
252  %sel = select i1 %B, i32 %load0, i32 %load1
253  ret i32 %sel
254}
255
256define void @dse1(ptr%P) {
257; CHECK-LABEL: @dse1(
258; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[P:%.*]], align 4
259; CHECK-NEXT:    ret void
260;
261  %v = load i32, ptr %P
262  store i32 %v, ptr %P
263  ret void
264}
265
266define void @dse2(ptr%P) {
267; CHECK-LABEL: @dse2(
268; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4
269; CHECK-NEXT:    ret void
270;
271  %v = load atomic i32, ptr %P seq_cst, align 4
272  store i32 %v, ptr %P
273  ret void
274}
275
276define void @dse3(ptr%P) {
277; CHECK-LABEL: @dse3(
278; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4
279; CHECK-NEXT:    ret void
280;
281  %v = load atomic i32, ptr %P seq_cst, align 4
282  store atomic i32 %v, ptr %P unordered, align 4
283  ret void
284}
285
286define i32 @dse4(ptr%P, ptr%Q) {
287; CHECK-LABEL: @dse4(
288; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[Q:%.*]], align 4
289; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4
290; CHECK-NEXT:    ret i32 0
291;
292  %a = load i32, ptr %Q
293  %v = load atomic i32, ptr %P unordered, align 4
294  store atomic i32 %v, ptr %P unordered, align 4
295  %b = load i32, ptr %Q
296  %res = sub i32 %a, %b
297  ret i32 %res
298}
299
300; Note that in this example, %P and %Q could in fact be the same
301; pointer.  %v could be different than the value observed for %a
302; and that's okay because we're using relaxed memory ordering.
303; The only guarantee we have to provide is that each of the loads
304; has to observe some value written to that location.  We  do
305; not have to respect the order in which those writes were done.
306define i32 @dse5(ptr%P, ptr%Q) {
307; CHECK-LABEL: @dse5(
308; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4
309; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[Q:%.*]] unordered, align 4
310; CHECK-NEXT:    ret i32 0
311;
312  %v = load atomic i32, ptr %P unordered, align 4
313  %a = load atomic i32, ptr %Q unordered, align 4
314  store atomic i32 %v, ptr %P unordered, align 4
315  %b = load atomic i32, ptr %Q unordered, align 4
316  %res = sub i32 %a, %b
317  ret i32 %res
318}
319
320
321define void @dse_neg1(ptr%P) {
322; CHECK-LABEL: @dse_neg1(
323; CHECK-NEXT:    store i32 5, ptr [[P:%.*]], align 4
324; CHECK-NEXT:    ret void
325;
326  %v = load i32, ptr %P
327  store i32 5, ptr %P
328  ret void
329}
330
331; Could remove the store, but only if ordering was somehow
332; encoded.
333define void @dse_neg2(ptr%P) {
334; CHECK-LABEL: @dse_neg2(
335; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[P:%.*]], align 4
336; CHECK-NEXT:    store atomic i32 [[V]], ptr [[P]] seq_cst, align 4
337; CHECK-NEXT:    ret void
338;
339  %v = load i32, ptr %P
340  store atomic i32 %v, ptr %P seq_cst, align 4
341  ret void
342}
343
344@c = external global i32, align 4
345declare i32 @reads_c(i32 returned)
346define void @pr28763() {
347; CHECK-LABEL: @pr28763(
348; CHECK-NEXT:  entry:
349; CHECK-NEXT:    store i32 0, ptr @c, align 4
350; CHECK-NEXT:    [[CALL:%.*]] = call i32 @reads_c(i32 0)
351; CHECK-NEXT:    store i32 2, ptr @c, align 4
352; CHECK-NEXT:    ret void
353;
354entry:
355  %load = load i32, ptr @c, align 4
356  store i32 0, ptr @c, align 4
357  %call = call i32 @reads_c(i32 0)
358  store i32 2, ptr @c, align 4
359  ret void
360}
361
362define i1 @cse_freeze(i1 %a) {
363; CHECK-LABEL: @cse_freeze(
364; CHECK-NEXT:  entry:
365; CHECK-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
366; CHECK-NEXT:    ret i1 [[B]]
367;
368entry:
369  %b = freeze i1 %a
370  %c = freeze i1 %a
371  %and = and i1 %b, %c
372  ret i1 %and
373}
374