xref: /llvm-project/llvm/test/Transforms/LICM/hoisting.ll (revision 43436993f48b1d75d9d80796cb0889ce7d191888)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm)' -S | FileCheck %s
3
4@X = global i32 0		; <ptr> [#uses=1]
5
6declare void @foo()
7
8declare i32 @llvm.bitreverse.i32(i32)
9
10; This testcase tests for a problem where LICM hoists
11; potentially trapping instructions when they are not guaranteed to execute.
12define i32 @test1(i1 %c) {
13; CHECK-LABEL: @test1(
14; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @X, align 4
15; CHECK-NEXT:    br label [[LOOP:%.*]]
16; CHECK:       Loop:
17; CHECK-NEXT:    call void @foo()
18; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOPTAIL:%.*]], label [[IFUNEQUAL:%.*]]
19; CHECK:       IfUnEqual:
20; CHECK-NEXT:    [[B1:%.*]] = sdiv i32 4, [[A]]
21; CHECK-NEXT:    br label [[LOOPTAIL]]
22; CHECK:       LoopTail:
23; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[B1]], [[IFUNEQUAL]] ]
24; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[OUT:%.*]]
25; CHECK:       Out:
26; CHECK-NEXT:    [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOPTAIL]] ]
27; CHECK-NEXT:    [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
28; CHECK-NEXT:    ret i32 [[C]]
29;
30  %A = load i32, ptr @X		; <i32> [#uses=2]
31  br label %Loop
32Loop:		; preds = %LoopTail, %0
33  call void @foo( )
34  br i1 %c, label %LoopTail, label %IfUnEqual
35
36IfUnEqual:		; preds = %Loop
37  %B1 = sdiv i32 4, %A		; <i32> [#uses=1]
38  br label %LoopTail
39
40LoopTail:		; preds = %IfUnEqual, %Loop
41  %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
42  br i1 %c, label %Loop, label %Out
43Out:		; preds = %LoopTail
44  %C = sub i32 %A, %B		; <i32> [#uses=1]
45  ret i32 %C
46}
47
48
49declare void @foo2(i32) nounwind
50
51
52;; It is ok and desirable to hoist this potentially trapping instruction.
53define i32 @test2(i1 %c) {
54; CHECK-LABEL: @test2(
55; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @X, align 4
56; CHECK-NEXT:    [[B:%.*]] = sdiv i32 4, [[A]]
57; CHECK-NEXT:    br label [[LOOP:%.*]]
58; CHECK:       Loop:
59; CHECK-NEXT:    br label [[LOOP2:%.*]]
60; CHECK:       loop2:
61; CHECK-NEXT:    call void @foo2(i32 [[B]])
62; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
63; CHECK:       Out:
64; CHECK-NEXT:    [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP2]] ]
65; CHECK-NEXT:    [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
66; CHECK-NEXT:    ret i32 [[C]]
67;
68  %A = load i32, ptr @X
69  br label %Loop
70
71Loop:
72  ;; Should have hoisted this div!
73  %B = sdiv i32 4, %A
74  br label %loop2
75
76loop2:
77  call void @foo2( i32 %B )
78  br i1 %c, label %Loop, label %Out
79
80Out:
81  %C = sub i32 %A, %B
82  ret i32 %C
83}
84
85
86; Don't bother constant folding the add, just hoist it.
87define i32 @test3(i1 %c) {
88; CHECK-LABEL: @test3(
89; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @X, align 4
90; CHECK-NEXT:    [[B:%.*]] = add i32 4, 2
91; CHECK-NEXT:    br label [[LOOP:%.*]]
92; CHECK:       Loop:
93; CHECK-NEXT:    call void @foo2(i32 [[B]])
94; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
95; CHECK:       Out:
96; CHECK-NEXT:    [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP]] ]
97; CHECK-NEXT:    [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]]
98; CHECK-NEXT:    ret i32 [[C]]
99;
100  %A = load i32, ptr @X		; <i32> [#uses=2]
101  br label %Loop
102Loop:
103  %B = add i32 4, 2		; <i32> [#uses=2]
104  call void @foo2( i32 %B )
105  br i1 %c, label %Loop, label %Out
106Out:		; preds = %Loop
107  %C = sub i32 %A, %B		; <i32> [#uses=1]
108  ret i32 %C
109}
110
111define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
112; CHECK-LABEL: @test4(
113; CHECK-NEXT:  entry:
114; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
115; CHECK:       for.body:
116; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
117; CHECK-NEXT:    [[N_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
118; CHECK-NEXT:    call void @foo_may_call_exit(i32 0)
119; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]]
120; CHECK-NEXT:    [[ADD]] = add nsw i32 [[N_01]], [[DIV]]
121; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
122; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 10000
123; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
124; CHECK:       for.end:
125; CHECK-NEXT:    [[N_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
126; CHECK-NEXT:    ret i32 [[N_0_LCSSA]]
127;
128entry:
129  br label %for.body
130
131for.body:                                         ; preds = %entry, %for.body
132  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
133  %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
134  call void @foo_may_call_exit(i32 0)
135  %div = sdiv i32 %x, %y
136  %add = add nsw i32 %n.01, %div
137  %inc = add nsw i32 %i.02, 1
138  %cmp = icmp slt i32 %inc, 10000
139  br i1 %cmp, label %for.body, label %for.end
140
141for.end:                                          ; preds = %for.body
142  %n.0.lcssa = phi i32 [ %add, %for.body ]
143  ret i32 %n.0.lcssa
144}
145
146declare void @foo_may_call_exit(i32)
147
148; PR14854
149define { ptr, i32 } @test5(i32 %i, { ptr, i32 } %e) {
150; CHECK-LABEL: @test5(
151; CHECK-NEXT:  entry:
152; CHECK-NEXT:    [[OUT:%.*]] = extractvalue { ptr, i32 } [[E:%.*]], 1
153; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
154; CHECK:       tailrecurse:
155; CHECK-NEXT:    [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[CMP2:%.*]], [[THEN:%.*]] ]
156; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[OUT]], [[I_TR]]
157; CHECK-NEXT:    br i1 [[CMP1]], label [[THEN]], label [[IFEND:%.*]]
158; CHECK:       then:
159; CHECK-NEXT:    call void @foo()
160; CHECK-NEXT:    [[CMP2]] = add i32 [[I_TR]], 1
161; CHECK-NEXT:    br label [[TAILRECURSE]]
162; CHECK:       ifend:
163; CHECK-NEXT:    [[D_LE:%.*]] = insertvalue { ptr, i32 } [[E]], ptr null, 0
164; CHECK-NEXT:    ret { ptr, i32 } [[D_LE]]
165;
166entry:
167  br label %tailrecurse
168
169tailrecurse:                                      ; preds = %then, %entry
170  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
171  %out = extractvalue { ptr, i32 } %e, 1
172  %d = insertvalue { ptr, i32 } %e, ptr null, 0
173  %cmp1 = icmp sgt i32 %out, %i.tr
174  br i1 %cmp1, label %then, label %ifend
175
176then:                                             ; preds = %tailrecurse
177  call void @foo()
178  %cmp2 = add i32 %i.tr, 1
179  br label %tailrecurse
180
181ifend:                                            ; preds = %tailrecurse
182  ret { ptr, i32 } %d
183}
184
185define void @test6(float %f) #2 {
186; CHECK-LABEL: @test6(
187; CHECK-NEXT:  entry:
188; CHECK-NEXT:    [[NEG:%.*]] = fneg float [[F:%.*]]
189; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
190; CHECK:       for.body:
191; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
192; CHECK-NEXT:    call void @foo_may_call_exit(i32 0)
193; CHECK-NEXT:    call void @use(float [[NEG]])
194; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
195; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 10000
196; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
197; CHECK:       for.end:
198; CHECK-NEXT:    ret void
199;
200entry:
201  br label %for.body
202
203for.body:                                         ; preds = %for.body, %entry
204  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
205  call void @foo_may_call_exit(i32 0)
206  %neg = fneg float %f
207  call void @use(float %neg)
208  %inc = add nsw i32 %i, 1
209  %cmp = icmp slt i32 %inc, 10000
210  br i1 %cmp, label %for.body, label %for.end
211
212for.end:                                          ; preds = %for.body
213  ret void
214}
215
216declare void @use(float)
217
218define i32 @hoist_bitreverse(i32 %0)  {
219; CHECK-LABEL: @hoist_bitreverse(
220; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP0:%.*]])
221; CHECK-NEXT:    br label [[HEADER:%.*]]
222; CHECK:       header:
223; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ]
224; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP6:%.*]], [[LATCH]] ]
225; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1024
226; CHECK-NEXT:    br i1 [[TMP4]], label [[BODY:%.*]], label [[RETURN:%.*]]
227; CHECK:       body:
228; CHECK-NEXT:    [[TMP5]] = add i32 [[SUM]], [[TMP2]]
229; CHECK-NEXT:    br label [[LATCH]]
230; CHECK:       latch:
231; CHECK-NEXT:    [[TMP6]] = add nsw i32 [[TMP3]], 1
232; CHECK-NEXT:    br label [[HEADER]]
233; CHECK:       return:
234; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[HEADER]] ]
235; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
236;
237  br label %header
238
239header:
240  %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
241  %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
242  %3 = icmp slt i32 %2, 1024
243  br i1 %3, label %body, label %return
244
245body:
246  %4 = call i32 @llvm.bitreverse.i32(i32 %0)
247  %5 = add i32 %sum, %4
248  br label %latch
249
250latch:
251  %6 = add nsw i32 %2, 1
252  br label %header
253
254return:
255  ret i32 %sum
256}
257
258; Can neither sink nor hoist
259define i32 @test_volatile(i1 %c) {
260; CHECK-LABEL: @test_volatile(
261; CHECK-NEXT:    br label [[LOOP:%.*]]
262; CHECK:       Loop:
263; CHECK-NEXT:    [[A:%.*]] = load volatile i32, ptr @X, align 4
264; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]]
265; CHECK:       Out:
266; CHECK-NEXT:    [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP]] ]
267; CHECK-NEXT:    ret i32 [[A_LCSSA]]
268;
269  br label %Loop
270
271Loop:
272  %A = load volatile i32, ptr @X
273  br i1 %c, label %Loop, label %Out
274
275Out:
276  ret i32 %A
277}
278
279
280declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) nounwind readonly
281declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind
282declare void @escaping.invariant.start(ptr) nounwind
283; invariant.start dominates the load, and in this scope, the
284; load is invariant. So, we can hoist the `addrld` load out of the loop.
285define i32 @test_fence(ptr %addr, i32 %n, ptr %volatile) {
286; CHECK-LABEL: @test_fence(
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
289; CHECK-NEXT:    store atomic i32 5, ptr [[GEP]] unordered, align 8
290; CHECK-NEXT:    fence release
291; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
292; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
293; CHECK-NEXT:    br label [[LOOP:%.*]]
294; CHECK:       loop:
295; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
296; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
297; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
298; CHECK-NEXT:    fence acquire
299; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
300; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
301; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
302; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
303; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
304; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
305; CHECK:       loopexit:
306; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
307; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
308;
309entry:
310  %gep = getelementptr inbounds i8, ptr %addr, i64 8
311  store atomic i32 5, ptr %gep unordered, align 8
312  fence release
313  %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
314  br label %loop
315
316loop:
317  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
318  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
319  %volload = load atomic i8, ptr %volatile unordered, align 8
320  fence acquire
321  %volchk = icmp eq i8 %volload, 0
322  %addrld = load atomic i32, ptr %gep unordered, align 8
323  %sel = select i1 %volchk, i32 0, i32 %addrld
324  %sum.next = add i32 %sel, %sum
325  %indvar.next = add i32 %indvar, 1
326  %cond = icmp slt i32 %indvar.next, %n
327  br i1 %cond, label %loop, label %loopexit
328
329loopexit:
330  ret i32 %sum
331}
332
333
334
335; Same as test above, but the load is no longer invariant (presence of
336; invariant.end). We cannot hoist the addrld out of loop.
337define i32 @test_fence1(ptr %addr, i32 %n, ptr %volatile) {
338; CHECK-LABEL: @test_fence1(
339; CHECK-NEXT:  entry:
340; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
341; CHECK-NEXT:    store atomic i32 5, ptr [[GEP]] unordered, align 8
342; CHECK-NEXT:    fence release
343; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
344; CHECK-NEXT:    call void @llvm.invariant.end.p0(ptr [[INVST]], i64 4, ptr [[GEP]])
345; CHECK-NEXT:    br label [[LOOP:%.*]]
346; CHECK:       loop:
347; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
348; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
349; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
350; CHECK-NEXT:    fence acquire
351; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
352; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
353; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
354; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
355; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
356; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
357; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
358; CHECK:       loopexit:
359; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
360; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
361;
362entry:
363  %gep = getelementptr inbounds i8, ptr %addr, i64 8
364  store atomic i32 5, ptr %gep unordered, align 8
365  fence release
366  %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
367  call void @llvm.invariant.end.p0(ptr %invst, i64 4, ptr %gep)
368  br label %loop
369
370loop:
371  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
372  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
373  %volload = load atomic i8, ptr %volatile unordered, align 8
374  fence acquire
375  %volchk = icmp eq i8 %volload, 0
376  %addrld = load atomic i32, ptr %gep unordered, align 8
377  %sel = select i1 %volchk, i32 0, i32 %addrld
378  %sum.next = add i32 %sel, %sum
379  %indvar.next = add i32 %indvar, 1
380  %cond = icmp slt i32 %indvar.next, %n
381  br i1 %cond, label %loop, label %loopexit
382
383loopexit:
384  ret i32 %sum
385}
386
387; same as test above, but instead of invariant.end, we have the result of
388; invariant.start escaping through a call. We cannot hoist the load.
389define i32 @test_fence2(ptr %addr, i32 %n, ptr %volatile) {
390; CHECK-LABEL: @test_fence2(
391; CHECK-NEXT:  entry:
392; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
393; CHECK-NEXT:    store atomic i32 5, ptr [[GEP]] unordered, align 8
394; CHECK-NEXT:    fence release
395; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]])
396; CHECK-NEXT:    call void @escaping.invariant.start(ptr [[INVST]])
397; CHECK-NEXT:    br label [[LOOP:%.*]]
398; CHECK:       loop:
399; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
400; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
401; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
402; CHECK-NEXT:    fence acquire
403; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
404; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
405; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
406; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
407; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
408; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
409; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
410; CHECK:       loopexit:
411; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
412; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
413;
414entry:
415  %gep = getelementptr inbounds i8, ptr %addr, i64 8
416  store atomic i32 5, ptr %gep unordered, align 8
417  fence release
418  %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep)
419  call void @escaping.invariant.start(ptr %invst)
420  br label %loop
421
422loop:
423  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
424  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
425  %volload = load atomic i8, ptr %volatile unordered, align 8
426  fence acquire
427  %volchk = icmp eq i8 %volload, 0
428  %addrld = load atomic i32, ptr %gep unordered, align 8
429  %sel = select i1 %volchk, i32 0, i32 %addrld
430  %sum.next = add i32 %sel, %sum
431  %indvar.next = add i32 %indvar, 1
432  %cond = icmp slt i32 %indvar.next, %n
433  br i1 %cond, label %loop, label %loopexit
434
435loopexit:
436  ret i32 %sum
437}
438
439; Consider the loadoperand addr.i bitcasted before being passed to
440; invariant.start
441define i32 @test_fence3(ptr %addr, i32 %n, ptr %volatile) {
442; CHECK-LABEL: @test_fence3(
443; CHECK-NEXT:  entry:
444; CHECK-NEXT:    [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8
445; CHECK-NEXT:    store atomic i32 5, ptr [[ADDR_I]] unordered, align 8
446; CHECK-NEXT:    fence release
447; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]])
448; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8
449; CHECK-NEXT:    br label [[LOOP:%.*]]
450; CHECK:       loop:
451; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
452; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
453; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
454; CHECK-NEXT:    fence acquire
455; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
456; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
457; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
458; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
459; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
460; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
461; CHECK:       loopexit:
462; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
463; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
464;
465entry:
466  %addr.i = getelementptr inbounds i32, ptr %addr, i64 8
467  store atomic i32 5, ptr %addr.i unordered, align 8
468  fence release
469  %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i)
470  br label %loop
471
472loop:
473  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
474  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
475  %volload = load atomic i8, ptr %volatile unordered, align 8
476  fence acquire
477  %volchk = icmp eq i8 %volload, 0
478  %addrld = load atomic i32, ptr %addr.i unordered, align 8
479  %sel = select i1 %volchk, i32 0, i32 %addrld
480  %sum.next = add i32 %sel, %sum
481  %indvar.next = add i32 %indvar, 1
482  %cond = icmp slt i32 %indvar.next, %n
483  br i1 %cond, label %loop, label %loopexit
484
485loopexit:
486  ret i32 %sum
487}
488
489; We should not hoist the addrld out of the loop.
490define i32 @test_fence4(ptr %addr, i32 %n, ptr %volatile) {
491; CHECK-LABEL: @test_fence4(
492; CHECK-NEXT:  entry:
493; CHECK-NEXT:    [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8
494; CHECK-NEXT:    br label [[LOOP:%.*]]
495; CHECK:       loop:
496; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
497; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
498; CHECK-NEXT:    store atomic i32 5, ptr [[ADDR_I]] unordered, align 8
499; CHECK-NEXT:    fence release
500; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]])
501; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
502; CHECK-NEXT:    fence acquire
503; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
504; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8
505; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
506; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
507; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
508; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
509; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
510; CHECK:       loopexit:
511; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
512; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
513;
514entry:
515  %addr.i = getelementptr inbounds i32, ptr %addr, i64 8
516  br label %loop
517
518loop:
519  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
520  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
521  store atomic i32 5, ptr %addr.i unordered, align 8
522  fence release
523  %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i)
524  %volload = load atomic i8, ptr %volatile unordered, align 8
525  fence acquire
526  %volchk = icmp eq i8 %volload, 0
527  %addrld = load atomic i32, ptr %addr.i unordered, align 8
528  %sel = select i1 %volchk, i32 0, i32 %addrld
529  %sum.next = add i32 %sel, %sum
530  %indvar.next = add i32 %indvar, 1
531  %cond = icmp slt i32 %indvar.next, %n
532  br i1 %cond, label %loop, label %loopexit
533
534loopexit:
535  ret i32 %sum
536}
537
538; We can't hoist the invariant load out of the loop because
539; the marker is given a variable size (-1).
540define i32 @test_fence5(ptr %addr, i32 %n, ptr %volatile) {
541; CHECK-LABEL: @test_fence5(
542; CHECK-NEXT:  entry:
543; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8
544; CHECK-NEXT:    store atomic i32 5, ptr [[GEP]] unordered, align 8
545; CHECK-NEXT:    fence release
546; CHECK-NEXT:    [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 -1, ptr [[GEP]])
547; CHECK-NEXT:    br label [[LOOP:%.*]]
548; CHECK:       loop:
549; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
550; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
551; CHECK-NEXT:    [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8
552; CHECK-NEXT:    fence acquire
553; CHECK-NEXT:    [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0
554; CHECK-NEXT:    [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8
555; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]]
556; CHECK-NEXT:    [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]]
557; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
558; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]]
559; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
560; CHECK:       loopexit:
561; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ]
562; CHECK-NEXT:    ret i32 [[SUM_LCSSA]]
563;
564entry:
565  %gep = getelementptr inbounds i8, ptr %addr, i64 8
566  store atomic i32 5, ptr %gep unordered, align 8
567  fence release
568  %invst = call ptr @llvm.invariant.start.p0(i64 -1, ptr %gep)
569  br label %loop
570
571loop:
572  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
573  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
574  %volload = load atomic i8, ptr %volatile unordered, align 8
575  fence acquire
576  %volchk = icmp eq i8 %volload, 0
577  %addrld = load atomic i32, ptr %gep unordered, align 8
578  %sel = select i1 %volchk, i32 0, i32 %addrld
579  %sum.next = add i32 %sel, %sum
580  %indvar.next = add i32 %indvar, 1
581  %cond = icmp slt i32 %indvar.next, %n
582  br i1 %cond, label %loop, label %loopexit
583
584loopexit:
585  ret i32 %sum
586}
587
588declare void @g(i1)
589
590@a = external global i8
591
592; FIXME: Support hoisting invariant loads of globals.
593define void @test_fence6() {
594; CHECK-LABEL: @test_fence6(
595; CHECK-NEXT:  entry:
596; CHECK-NEXT:    [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr @a)
597; CHECK-NEXT:    br label [[F:%.*]]
598; CHECK:       f:
599; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr @a, align 1
600; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], 0
601; CHECK-NEXT:    [[T:%.*]] = icmp eq i8 [[TMP1]], 0
602; CHECK-NEXT:    tail call void @g(i1 [[T]])
603; CHECK-NEXT:    br label [[F]]
604;
605entry:
606  %i = call ptr @llvm.invariant.start.p0(i64 1, ptr @a)
607  br label %f
608
609f:
610  %0 = load i8, ptr @a
611  %1 = and i8 %0, 0
612  %t = icmp eq i8 %1, 0
613  tail call void @g(i1 %t)
614  br label %f
615}
616