xref: /llvm-project/llvm/test/Transforms/GVN/PRE/atomic.ll (revision 6e56cdac306f3bd0e25377bf30488141c5980ca1)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -passes=gvn -S < %s | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5target triple = "x86_64-apple-macosx10.7.0"
6
7@x = common global i32 0, align 4
8@y = common global i32 0, align 4
9
10; GVN across unordered store (allowed)
11define i32 @test1() nounwind uwtable ssp {
12; CHECK-LABEL: define i32 @test1
13; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
14; CHECK-NEXT:  entry:
15; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @y, align 4
16; CHECK-NEXT:    store atomic i32 [[X]], ptr @x unordered, align 4
17; CHECK-NEXT:    [[Z:%.*]] = add i32 [[X]], [[X]]
18; CHECK-NEXT:    ret i32 [[Z]]
19;
20entry:
21  %x = load i32, ptr @y
22  store atomic i32 %x, ptr @x unordered, align 4
23  %y = load i32, ptr @y
24  %z = add i32 %x, %y
25  ret i32 %z
26}
27
28; GVN across unordered load (allowed)
29define i32 @test3() nounwind uwtable ssp {
30; CHECK-LABEL: define i32 @test3
31; CHECK-SAME: () #[[ATTR0]] {
32; CHECK-NEXT:  entry:
33; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @y, align 4
34; CHECK-NEXT:    [[Y:%.*]] = load atomic i32, ptr @x unordered, align 4
35; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[X]]
36; CHECK-NEXT:    [[B:%.*]] = add i32 [[Y]], [[A]]
37; CHECK-NEXT:    ret i32 [[B]]
38;
39entry:
40  %x = load i32, ptr @y
41  %y = load atomic i32, ptr @x unordered, align 4
42  %z = load i32, ptr @y
43  %a = add i32 %x, %z
44  %b = add i32 %y, %a
45  ret i32 %b
46}
47
48; GVN load to unordered load (allowed)
49define i32 @test5() nounwind uwtable ssp {
50; CHECK-LABEL: define i32 @test5
51; CHECK-SAME: () #[[ATTR0]] {
52; CHECK-NEXT:  entry:
53; CHECK-NEXT:    [[X:%.*]] = load atomic i32, ptr @x unordered, align 4
54; CHECK-NEXT:    [[Z:%.*]] = add i32 [[X]], [[X]]
55; CHECK-NEXT:    ret i32 [[Z]]
56;
57entry:
58  %x = load atomic i32, ptr @x unordered, align 4
59  %y = load i32, ptr @x
60  %z = add i32 %x, %y
61  ret i32 %z
62}
63
64; GVN unordered load to load (unordered load must not be removed)
65define i32 @test6() nounwind uwtable ssp {
66; CHECK-LABEL: define i32 @test6
67; CHECK-SAME: () #[[ATTR0]] {
68; CHECK-NEXT:  entry:
69; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @x, align 4
70; CHECK-NEXT:    [[X2:%.*]] = load atomic i32, ptr @x unordered, align 4
71; CHECK-NEXT:    [[X3:%.*]] = add i32 [[X]], [[X2]]
72; CHECK-NEXT:    ret i32 [[X3]]
73;
74entry:
75  %x = load i32, ptr @x
76  %x2 = load atomic i32, ptr @x unordered, align 4
77  %x3 = add i32 %x, %x2
78  ret i32 %x3
79}
80
81; GVN across release-acquire pair (forbidden)
82define i32 @test7() nounwind uwtable ssp {
83; CHECK-LABEL: define i32 @test7
84; CHECK-SAME: () #[[ATTR0]] {
85; CHECK-NEXT:  entry:
86; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @y, align 4
87; CHECK-NEXT:    store atomic i32 [[X]], ptr @x release, align 4
88; CHECK-NEXT:    [[W:%.*]] = load atomic i32, ptr @x acquire, align 4
89; CHECK-NEXT:    [[Y:%.*]] = load i32, ptr @y, align 4
90; CHECK-NEXT:    [[Z:%.*]] = add i32 [[X]], [[Y]]
91; CHECK-NEXT:    ret i32 [[Z]]
92;
93entry:
94  %x = load i32, ptr @y
95  store atomic i32 %x, ptr @x release, align 4
96  %w = load atomic i32, ptr @x acquire, align 4
97  %y = load i32, ptr @y
98  %z = add i32 %x, %y
99  ret i32 %z
100}
101
102; GVN across monotonic store (allowed)
103define i32 @test9() nounwind uwtable ssp {
104; CHECK-LABEL: define i32 @test9
105; CHECK-SAME: () #[[ATTR0]] {
106; CHECK-NEXT:  entry:
107; CHECK-NEXT:    [[X:%.*]] = load i32, ptr @y, align 4
108; CHECK-NEXT:    store atomic i32 [[X]], ptr @x monotonic, align 4
109; CHECK-NEXT:    [[Z:%.*]] = add i32 [[X]], [[X]]
110; CHECK-NEXT:    ret i32 [[Z]]
111;
112entry:
113  %x = load i32, ptr @y
114  store atomic i32 %x, ptr @x monotonic, align 4
115  %y = load i32, ptr @y
116  %z = add i32 %x, %y
117  ret i32 %z
118}
119
120; GVN of an unordered across monotonic load (not allowed)
121define i32 @test10() nounwind uwtable ssp {
122; CHECK-LABEL: define i32 @test10
123; CHECK-SAME: () #[[ATTR0]] {
124; CHECK-NEXT:  entry:
125; CHECK-NEXT:    [[X:%.*]] = load atomic i32, ptr @y unordered, align 4
126; CHECK-NEXT:    [[CLOBBER:%.*]] = load atomic i32, ptr @x monotonic, align 4
127; CHECK-NEXT:    [[Y:%.*]] = load atomic i32, ptr @y monotonic, align 4
128; CHECK-NEXT:    [[Z:%.*]] = add i32 [[X]], [[Y]]
129; CHECK-NEXT:    ret i32 [[Z]]
130;
131entry:
132  %x = load atomic i32, ptr @y unordered, align 4
133  %clobber = load atomic i32, ptr @x monotonic, align 4
134  %y = load atomic i32, ptr @y monotonic, align 4
135  %z = add i32 %x, %y
136  ret i32 %z
137}
138
139define i32 @PR22708(i1 %flag) {
140; CHECK-LABEL: define i32 @PR22708
141; CHECK-SAME: (i1 [[FLAG:%.*]]) {
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    br i1 [[FLAG]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
144; CHECK:       if.then:
145; CHECK-NEXT:    store i32 43, ptr @y, align 4
146; CHECK-NEXT:    br label [[IF_END]]
147; CHECK:       if.end:
148; CHECK-NEXT:    [[TMP0:%.*]] = load atomic i32, ptr @x acquire, align 4
149; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr @y, align 4
150; CHECK-NEXT:    ret i32 [[LOAD]]
151;
152entry:
153  br i1 %flag, label %if.then, label %if.end
154
155if.then:
156  store i32 43, ptr @y, align 4
157  br label %if.end
158
159if.end:
160  load atomic i32, ptr @x acquire, align 4
161  %load = load i32, ptr @y, align 4
162  ret i32 %load
163}
164
165; Can't remove a load over a ordering barrier
166define i32 @test12(i1 %B, ptr %P1, ptr %P2) {
167; CHECK-LABEL: define i32 @test12
168; CHECK-SAME: (i1 [[B:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
169; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, ptr [[P1]], align 4
170; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[P2]] seq_cst, align 4
171; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[P1]], align 4
172; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B]], i32 [[LOAD0]], i32 [[LOAD1]]
173; CHECK-NEXT:    ret i32 [[SEL]]
174;
175  %load0 = load i32, ptr %P1
176  %1 = load atomic i32, ptr %P2 seq_cst, align 4
177  %load1 = load i32, ptr %P1
178  %sel = select i1 %B, i32 %load0, i32 %load1
179  ret i32 %sel
180}
181
182; atomic to non-atomic forwarding is legal
183define i32 @test13(ptr %P1) {
184; CHECK-LABEL: define i32 @test13
185; CHECK-SAME: (ptr [[P1:%.*]]) {
186; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
187; CHECK-NEXT:    ret i32 0
188;
189  %a = load atomic i32, ptr %P1 seq_cst, align 4
190  %b = load i32, ptr %P1
191  %res = sub i32 %a, %b
192  ret i32 %res
193}
194
195define i32 @test13b(ptr %P1) {
196; CHECK-LABEL: define i32 @test13b
197; CHECK-SAME: (ptr [[P1:%.*]]) {
198; CHECK-NEXT:    store atomic i32 0, ptr [[P1]] unordered, align 4
199; CHECK-NEXT:    ret i32 0
200;
201  store  atomic i32 0, ptr %P1 unordered, align 4
202  %b = load i32, ptr %P1
203  ret i32 %b
204}
205
206; atomic to unordered atomic forwarding is legal
207define i32 @test14(ptr %P1) {
208; CHECK-LABEL: define i32 @test14
209; CHECK-SAME: (ptr [[P1:%.*]]) {
210; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
211; CHECK-NEXT:    ret i32 0
212;
213  %a = load atomic i32, ptr %P1 seq_cst, align 4
214  %b = load atomic i32, ptr %P1 unordered, align 4
215  %res = sub i32 %a, %b
216  ret i32 %res
217}
218
219; implementation restriction: can't forward to stonger
220; than unordered
221define i32 @test15(ptr %P1, ptr %P2) {
222; CHECK-LABEL: define i32 @test15
223; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
224; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
225; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
226; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
227; CHECK-NEXT:    ret i32 [[RES]]
228;
229  %a = load atomic i32, ptr %P1 seq_cst, align 4
230  %b = load atomic i32, ptr %P1 seq_cst, align 4
231  %res = sub i32 %a, %b
232  ret i32 %res
233}
234
235; forwarding non-atomic to atomic is wrong! (However,
236; it would be legal to use the later value in place of the
237; former in this particular example.  We just don't
238; do that right now.)
239define i32 @test16(ptr %P1, ptr %P2) {
240; CHECK-LABEL: define i32 @test16
241; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
242; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P1]], align 4
243; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
244; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
245; CHECK-NEXT:    ret i32 [[RES]]
246;
247  %a = load i32, ptr %P1, align 4
248  %b = load atomic i32, ptr %P1 unordered, align 4
249  %res = sub i32 %a, %b
250  ret i32 %res
251}
252
253define i32 @test16b(ptr %P1) {
254; CHECK-LABEL: define i32 @test16b
255; CHECK-SAME: (ptr [[P1:%.*]]) {
256; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
257; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
258; CHECK-NEXT:    ret i32 [[B]]
259;
260  store i32 0, ptr %P1
261  %b = load atomic i32, ptr %P1 unordered, align 4
262  ret i32 %b
263}
264
265; Can't DSE across a full fence
266define void @fence_seq_cst_store(ptr %P1, ptr %P2) {
267; CHECK-LABEL: define void @fence_seq_cst_store
268; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
269; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
270; CHECK-NEXT:    store atomic i32 0, ptr [[P2]] seq_cst, align 4
271; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
272; CHECK-NEXT:    ret void
273;
274  store i32 0, ptr %P1, align 4
275  store atomic i32 0, ptr %P2 seq_cst, align 4
276  store i32 0, ptr %P1, align 4
277  ret void
278}
279
280; Can't DSE across a full fence
281define void @fence_seq_cst(ptr %P1, ptr %P2) {
282; CHECK-LABEL: define void @fence_seq_cst
283; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
284; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
285; CHECK-NEXT:    fence seq_cst
286; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
287; CHECK-NEXT:    ret void
288;
289  store i32 0, ptr %P1, align 4
290  fence seq_cst
291  store i32 0, ptr %P1, align 4
292  ret void
293}
294
295; Can't DSE across a full syncscope("singlethread") fence
296define void @fence_seq_cst_st(ptr %P1, ptr %P2) {
297; CHECK-LABEL: define void @fence_seq_cst_st
298; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
299; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
300; CHECK-NEXT:    fence syncscope("singlethread") seq_cst
301; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
302; CHECK-NEXT:    ret void
303;
304  store i32 0, ptr %P1, align 4
305  fence syncscope("singlethread") seq_cst
306  store i32 0, ptr %P1, align 4
307  ret void
308}
309
310; Can't DSE across a full fence
311define void @fence_asm_sideeffect(ptr %P1, ptr %P2) {
312; CHECK-LABEL: define void @fence_asm_sideeffect
313; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
314; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
315; CHECK-NEXT:    call void asm sideeffect "", ""()
316; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
317; CHECK-NEXT:    ret void
318;
319  store i32 0, ptr %P1, align 4
320  call void asm sideeffect "", ""()
321  store i32 0, ptr %P1, align 4
322  ret void
323}
324
325; Can't DSE across a full fence
326define void @fence_asm_memory(ptr %P1, ptr %P2) {
327; CHECK-LABEL: define void @fence_asm_memory
328; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
329; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
330; CHECK-NEXT:    call void asm "", "~{memory}"()
331; CHECK-NEXT:    store i32 0, ptr [[P1]], align 4
332; CHECK-NEXT:    ret void
333;
334  store i32 0, ptr %P1, align 4
335  call void asm "", "~{memory}"()
336  store i32 0, ptr %P1, align 4
337  ret void
338}
339
340; Can't remove a volatile load
341define i32 @volatile_load(ptr %P1, ptr %P2) {
342; CHECK-LABEL: define i32 @volatile_load
343; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
344; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P1]], align 4
345; CHECK-NEXT:    [[B:%.*]] = load volatile i32, ptr [[P1]], align 4
346; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
347; CHECK-NEXT:    ret i32 [[RES]]
348;
349  %a = load i32, ptr %P1, align 4
350  %b = load volatile i32, ptr %P1, align 4
351  %res = sub i32 %a, %b
352  ret i32 %res
353}
354
355; Can't remove redundant volatile loads
356define i32 @redundant_volatile_load(ptr %P1, ptr %P2) {
357; CHECK-LABEL: define i32 @redundant_volatile_load
358; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
359; CHECK-NEXT:    [[A:%.*]] = load volatile i32, ptr [[P1]], align 4
360; CHECK-NEXT:    [[B:%.*]] = load volatile i32, ptr [[P1]], align 4
361; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
362; CHECK-NEXT:    ret i32 [[RES]]
363;
364  %a = load volatile i32, ptr %P1, align 4
365  %b = load volatile i32, ptr %P1, align 4
366  %res = sub i32 %a, %b
367  ret i32 %res
368}
369
370; Can't DSE a volatile store
371define void @volatile_store(ptr %P1, ptr %P2) {
372; CHECK-LABEL: define void @volatile_store
373; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
374; CHECK-NEXT:    store volatile i32 0, ptr [[P1]], align 4
375; CHECK-NEXT:    store i32 3, ptr [[P1]], align 4
376; CHECK-NEXT:    ret void
377;
378  store volatile i32 0, ptr %P1, align 4
379  store i32 3, ptr %P1, align 4
380  ret void
381}
382
383; Can't DSE a redundant volatile store
384define void @redundant_volatile_store(ptr %P1, ptr %P2) {
385; CHECK-LABEL: define void @redundant_volatile_store
386; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
387; CHECK-NEXT:    store volatile i32 0, ptr [[P1]], align 4
388; CHECK-NEXT:    store volatile i32 0, ptr [[P1]], align 4
389; CHECK-NEXT:    ret void
390;
391  store volatile i32 0, ptr %P1, align 4
392  store volatile i32 0, ptr %P1, align 4
393  ret void
394}
395
396; Can value forward from volatiles
397define i32 @test20(ptr %P1, ptr %P2) {
398; CHECK-LABEL: define i32 @test20
399; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
400; CHECK-NEXT:    [[A:%.*]] = load volatile i32, ptr [[P1]], align 4
401; CHECK-NEXT:    ret i32 0
402;
403  %a = load volatile i32, ptr %P1, align 4
404  %b = load i32, ptr %P1, align 4
405  %res = sub i32 %a, %b
406  ret i32 %res
407}
408
409; We're currently conservative about widening
410define i64 @widen1(ptr %P1) {
411; CHECK-LABEL: define i64 @widen1
412; CHECK-SAME: (ptr [[P1:%.*]]) {
413; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
414; CHECK-NEXT:    [[B:%.*]] = load atomic i64, ptr [[P1]] unordered, align 4
415; CHECK-NEXT:    [[A64:%.*]] = sext i32 [[A]] to i64
416; CHECK-NEXT:    [[RES:%.*]] = sub i64 [[A64]], [[B]]
417; CHECK-NEXT:    ret i64 [[RES]]
418;
419  %a = load atomic i32, ptr %P1 unordered, align 4
420  %b = load atomic i64, ptr %P1 unordered, align 4
421  %a64 = sext i32 %a to i64
422  %res = sub i64 %a64, %b
423  ret i64 %res
424}
425
426; narrowing does work
427define i64 @narrow(ptr %P1) {
428; CHECK-LABEL: define i64 @narrow
429; CHECK-SAME: (ptr [[P1:%.*]]) {
430; CHECK-NEXT:    [[A64:%.*]] = load atomic i64, ptr [[P1]] unordered, align 4
431; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A64]] to i32
432; CHECK-NEXT:    [[B64:%.*]] = sext i32 [[TMP1]] to i64
433; CHECK-NEXT:    [[RES:%.*]] = sub i64 [[A64]], [[B64]]
434; CHECK-NEXT:    ret i64 [[RES]]
435;
436  %a64 = load atomic i64, ptr %P1 unordered, align 4
437  %b = load atomic i32, ptr %P1 unordered, align 4
438  %b64 = sext i32 %b to i64
439  %res = sub i64 %a64, %b64
440  ret i64 %res
441}
442
443; Missed optimization, we don't yet optimize ordered loads
444define i64 @narrow2(ptr %P1) {
445; CHECK-LABEL: define i64 @narrow2
446; CHECK-SAME: (ptr [[P1:%.*]]) {
447; CHECK-NEXT:    [[A64:%.*]] = load atomic i64, ptr [[P1]] acquire, align 4
448; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4
449; CHECK-NEXT:    [[B64:%.*]] = sext i32 [[B]] to i64
450; CHECK-NEXT:    [[RES:%.*]] = sub i64 [[A64]], [[B64]]
451; CHECK-NEXT:    ret i64 [[RES]]
452;
453  %a64 = load atomic i64, ptr %P1 acquire, align 4
454  %b = load atomic i32, ptr %P1 acquire, align 4
455  %b64 = sext i32 %b to i64
456  %res = sub i64 %a64, %b64
457  ret i64 %res
458}
459
460; Note: The cross block FRE testing is deliberately light.  All of the tricky
461; bits of legality are shared code with the block-local FRE above.  These
462; are here only to show that we haven't obviously broken anything.
463
464; unordered atomic to unordered atomic
465define i32 @non_local_fre(ptr %P1) {
466; CHECK-LABEL: define i32 @non_local_fre
467; CHECK-SAME: (ptr [[P1:%.*]]) {
468; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
469; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
470; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
471; CHECK:       early:
472; CHECK-NEXT:    ret i32 0
473; CHECK:       next:
474; CHECK-NEXT:    ret i32 0
475;
476  %a = load atomic i32, ptr %P1 unordered, align 4
477  %cmp = icmp eq i32 %a, 0
478  br i1 %cmp, label %early, label %next
479early:
480  ret i32 %a
481next:
482  %b = load atomic i32, ptr %P1 unordered, align 4
483  %res = sub i32 %a, %b
484  ret i32 %res
485}
486
487; unordered atomic to non-atomic
488define i32 @non_local_fre2(ptr %P1) {
489; CHECK-LABEL: define i32 @non_local_fre2
490; CHECK-SAME: (ptr [[P1:%.*]]) {
491; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
492; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
493; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
494; CHECK:       early:
495; CHECK-NEXT:    ret i32 0
496; CHECK:       next:
497; CHECK-NEXT:    ret i32 0
498;
499  %a = load atomic i32, ptr %P1 unordered, align 4
500  %cmp = icmp eq i32 %a, 0
501  br i1 %cmp, label %early, label %next
502early:
503  ret i32 %a
504next:
505  %b = load i32, ptr %P1
506  %res = sub i32 %a, %b
507  ret i32 %res
508}
509
510; Can't forward ordered atomics.
511define i32 @non_local_fre3(ptr %P1) {
512; CHECK-LABEL: define i32 @non_local_fre3
513; CHECK-SAME: (ptr [[P1:%.*]]) {
514; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4
515; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
516; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
517; CHECK:       early:
518; CHECK-NEXT:    ret i32 0
519; CHECK:       next:
520; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4
521; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
522; CHECK-NEXT:    ret i32 [[RES]]
523;
524  %a = load atomic i32, ptr %P1 acquire, align 4
525  %cmp = icmp eq i32 %a, 0
526  br i1 %cmp, label %early, label %next
527early:
528  ret i32 %a
529next:
530  %b = load atomic i32, ptr %P1 acquire, align 4
531  %res = sub i32 %a, %b
532  ret i32 %res
533}
534
535declare void @clobber()
536
537; unordered atomic to unordered atomic
538define i32 @non_local_pre(ptr %P1) {
539; CHECK-LABEL: define i32 @non_local_pre
540; CHECK-SAME: (ptr [[P1:%.*]]) {
541; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
542; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
543; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
544; CHECK:       early:
545; CHECK-NEXT:    call void @clobber()
546; CHECK-NEXT:    [[B_PRE:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
547; CHECK-NEXT:    br label [[NEXT]]
548; CHECK:       next:
549; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ]
550; CHECK-NEXT:    ret i32 [[B]]
551;
552  %a = load atomic i32, ptr %P1 unordered, align 4
553  %cmp = icmp eq i32 %a, 0
554  br i1 %cmp, label %early, label %next
555early:
556  call void @clobber()
557  br label %next
558next:
559  %b = load atomic i32, ptr %P1 unordered, align 4
560  ret i32 %b
561}
562
563; unordered atomic to non-atomic
564define i32 @non_local_pre2(ptr %P1) {
565; CHECK-LABEL: define i32 @non_local_pre2
566; CHECK-SAME: (ptr [[P1:%.*]]) {
567; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
568; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
569; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
570; CHECK:       early:
571; CHECK-NEXT:    call void @clobber()
572; CHECK-NEXT:    [[B_PRE:%.*]] = load i32, ptr [[P1]], align 4
573; CHECK-NEXT:    br label [[NEXT]]
574; CHECK:       next:
575; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ]
576; CHECK-NEXT:    ret i32 [[B]]
577;
578  %a = load atomic i32, ptr %P1 unordered, align 4
579  %cmp = icmp eq i32 %a, 0
580  br i1 %cmp, label %early, label %next
581early:
582  call void @clobber()
583  br label %next
584next:
585  %b = load i32, ptr %P1
586  ret i32 %b
587}
588
589; non-atomic to unordered atomic - can't forward!
590define i32 @non_local_pre3(ptr %P1) {
591; CHECK-LABEL: define i32 @non_local_pre3
592; CHECK-SAME: (ptr [[P1:%.*]]) {
593; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P1]], align 4
594; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
595; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
596; CHECK:       early:
597; CHECK-NEXT:    call void @clobber()
598; CHECK-NEXT:    br label [[NEXT]]
599; CHECK:       next:
600; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
601; CHECK-NEXT:    ret i32 [[B]]
602;
603  %a = load i32, ptr %P1
604  %cmp = icmp eq i32 %a, 0
605  br i1 %cmp, label %early, label %next
606early:
607  call void @clobber()
608  br label %next
609next:
610  %b = load atomic i32, ptr %P1 unordered, align 4
611  ret i32 %b
612}
613
614; ordered atomic to ordered atomic - can't forward
615define i32 @non_local_pre4(ptr %P1) {
616; CHECK-LABEL: define i32 @non_local_pre4
617; CHECK-SAME: (ptr [[P1:%.*]]) {
618; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
619; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
620; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
621; CHECK:       early:
622; CHECK-NEXT:    call void @clobber()
623; CHECK-NEXT:    br label [[NEXT]]
624; CHECK:       next:
625; CHECK-NEXT:    [[B:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
626; CHECK-NEXT:    ret i32 [[B]]
627;
628  %a = load atomic i32, ptr %P1 seq_cst, align 4
629  %cmp = icmp eq i32 %a, 0
630  br i1 %cmp, label %early, label %next
631early:
632  call void @clobber()
633  br label %next
634next:
635  %b = load atomic i32, ptr %P1 seq_cst, align 4
636  ret i32 %b
637}
638
639; can't remove volatile on any path
640define i32 @non_local_pre5(ptr %P1) {
641; CHECK-LABEL: define i32 @non_local_pre5
642; CHECK-SAME: (ptr [[P1:%.*]]) {
643; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
644; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
645; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
646; CHECK:       early:
647; CHECK-NEXT:    call void @clobber()
648; CHECK-NEXT:    br label [[NEXT]]
649; CHECK:       next:
650; CHECK-NEXT:    [[B:%.*]] = load volatile i32, ptr [[P1]], align 4
651; CHECK-NEXT:    ret i32 [[B]]
652;
653  %a = load atomic i32, ptr %P1 seq_cst, align 4
654  %cmp = icmp eq i32 %a, 0
655  br i1 %cmp, label %early, label %next
656early:
657  call void @clobber()
658  br label %next
659next:
660  %b = load volatile i32, ptr %P1
661  ret i32 %b
662}
663
664
665; ordered atomic to unordered atomic
666define i32 @non_local_pre6(ptr %P1) {
667; CHECK-LABEL: define i32 @non_local_pre6
668; CHECK-SAME: (ptr [[P1:%.*]]) {
669; CHECK-NEXT:    [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4
670; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
671; CHECK-NEXT:    br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]]
672; CHECK:       early:
673; CHECK-NEXT:    call void @clobber()
674; CHECK-NEXT:    [[B_PRE:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4
675; CHECK-NEXT:    br label [[NEXT]]
676; CHECK:       next:
677; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ]
678; CHECK-NEXT:    ret i32 [[B]]
679;
680  %a = load atomic i32, ptr %P1 seq_cst, align 4
681  %cmp = icmp eq i32 %a, 0
682  br i1 %cmp, label %early, label %next
683early:
684  call void @clobber()
685  br label %next
686next:
687  %b = load atomic i32, ptr %P1 unordered, align 4
688  ret i32 %b
689}
690
691