xref: /llvm-project/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll (revision 2568e52a733a9767014e0d8ccb685553479a3031)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
3
4;; Basic case: check masked.load/store is generated for i16/i32/i64.
5define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
6; CHECK-LABEL: @basic(
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
9; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
10; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
11; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
12; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
13; CHECK-NEXT:    [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
14; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
15; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
16; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
17; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
18; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
19; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
20; CHECK-NEXT:    call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
21; CHECK-NEXT:    ret void
22;
23entry:
24  br i1 %cond, label %if.true, label %if.false
25
26if.false:
27  br label %if.end
28
29if.true:
30  %0 = load i16, ptr %p, align 2
31  %1 = load i32, ptr %q, align 4
32  %2 = load i64, ptr %b, align 8
33  store i16 %0, ptr %b, align 2
34  store i32 %1, ptr %p, align 4
35  store i64 %2, ptr %q, align 8
36  br label %if.false
37
38if.end:
39  ret void
40}
41
42;; Successor 1 branches to successor 0.
43define void @succ1to0(ptr %p, ptr %q, i32 %a) {
44; CHECK-LABEL: @succ1to0(
45; CHECK-NEXT:  entry:
46; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
47; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
48; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
49; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
50; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
51; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
52; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
53; CHECK-NEXT:    ret void
54;
55entry:
56  %tobool = icmp ne i32 %a, 0
57  br i1 %tobool, label %if.end, label %if.then
58
59if.end:
60  ret void
61
62if.then:
63  %0 = load i32, ptr %q
64  store i32 %0, ptr %p
65  br label %if.end
66}
67
68;; Successor 1 branches to successor 0 and there is a phi node.
69define i32 @succ1to0_phi(ptr %p)  {
70; CHECK-LABEL: @succ1to0_phi(
71; CHECK-NEXT:  entry:
72; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
73; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
74; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
75; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
76; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
77; CHECK-NEXT:    ret i32 [[TMP3]]
78;
79entry:
80  %cond = icmp eq ptr %p, null
81  br i1 %cond, label %if.true, label %if.false
82
83if.false:
84  %0 = load i32, ptr %p
85  br label %if.true
86
87if.true:
88  %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
89  ret i32 %res
90}
91
92;; Successor 0 branches to successor 1.
93define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
94; CHECK-LABEL: @succ0to1(
95; CHECK-NEXT:  entry:
96; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
97; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
98; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
99; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
100; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
101; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
102; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
103; CHECK-NEXT:    ret void
104;
105entry:
106  %cond = icmp eq i32 %a, 0
107  br i1 %cond, label %if.true, label %if.false
108
109if.false:
110  store i32 1, ptr %q
111  br label %if.end
112
113if.true:
114  %0 = load i32, ptr %b
115  store i32 %0, ptr %p
116  br label %if.false
117
118if.end:
119  ret void
120}
121
122;; Load after store can be hoisted.
123define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
124; CHECK-LABEL: @load_after_store(
125; CHECK-NEXT:  entry:
126; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
127; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
128; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
129; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
130; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
131; CHECK-NEXT:    [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
132; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
133; CHECK-NEXT:    ret i64 [[SPEC_SELECT]]
134;
135entry:
136  %cond = icmp eq i32 %a, 0
137  br i1 %cond, label %if.true, label %if.end
138
139if.true:
140  store i32 1, ptr %b
141  %0 = load i16, ptr %p
142  %zext = zext i16 %0 to i64
143  ret i64 %zext
144
145if.end:
146  ret i64 0
147}
148
149;; Speculatable memory read doesn't prevent the hoist.
150define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
151; CHECK-LABEL: @load_skip_speculatable_memory_read(
152; CHECK-NEXT:  entry:
153; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
154; CHECK-NEXT:    [[READ:%.*]] = call i32 @read_memory_only()
155; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
156; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
157; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
158; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
159; CHECK-NEXT:    ret void
160;
161entry:
162  %cond = icmp eq i32 %a, 0
163  br i1 %cond, label %if.true, label %if.false
164
165if.false:
166  store i32 1, ptr %q
167  br label %if.end
168
169if.true:
170  %read = call i32 @read_memory_only()
171  store i32 %read, ptr %p
172  br label %if.false
173
174if.end:
175  ret void
176}
177
178;; Source of the load can be a GEP.
179define i32 @load_from_gep(ptr %p)  {
180; CHECK-LABEL: @load_from_gep(
181; CHECK-NEXT:  entry:
182; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
183; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
184; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
185; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
186; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
187; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
188; CHECK-NEXT:    ret i32 [[TMP3]]
189;
190entry:
191  %cond = icmp eq ptr %p, null
192  br i1 %cond, label %if.true, label %if.false
193
194if.false:
195  %arrayidx = getelementptr inbounds i8, ptr %p, i64 16
196  %0 = load i32, ptr %arrayidx
197  br label %if.true
198
199if.true:
200  %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
201  ret i32 %res
202}
203
204;; Metadata range/annotation are kept.
205define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) {
206; CHECK-LABEL: @nondebug_metadata(
207; CHECK-NEXT:  entry:
208; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
209; CHECK-NEXT:    [[TMP1:%.*]] = call range(i16 0, 10) <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
210; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
211; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META5:![0-9]+]]
212; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
213; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
214; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META5]]
215; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
216; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]])
217; CHECK-NEXT:    ret void
218;
219entry:
220  br i1 %cond, label %if.true, label %if.false
221
222if.false:
223  ret void
224
225if.true:
226  %0 = load i16, ptr %p, align 2, !range !{i16 0, i16 10}
227  %1 = load i32, ptr %q, align 4, !annotation !11
228  store i16 %0, ptr %q, align 4, !annotation !11
229  store i32 %1, ptr %p, align 2
230  br label %if.false
231}
232
233define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) {
234; CHECK-LABEL: @debug_metadata_diassign(
235; CHECK-NEXT:  bb0:
236; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
237; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
238; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
239; CHECK-NEXT:    ret i16 [[SPEC_SELECT]]
240;
241bb0:
242  br i1 %cond, label %if.true, label %if.false
243
244if.true:
245  store i16 7, ptr %p, align 4, !DIAssignID !9
246  br label %if.false
247
248if.false:
249  %ret = phi i16 [ 2, %bb0 ], [ 3, %if.true ]
250  call void @llvm.dbg.assign(metadata i16 %ret, metadata !8, metadata !DIExpression(), metadata !9, metadata ptr %p, metadata !DIExpression()), !dbg !7
251  ret i16 %ret
252}
253
254;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores.
255define i32 @hoist_cond_stores(i1 %cond, ptr %p) {
256; CHECK-LABEL: @hoist_cond_stores(
257; CHECK-NEXT:  entry:
258; CHECK-NEXT:    store i1 false, ptr [[P:%.*]], align 2
259; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false
260; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
261; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
262; CHECK-NEXT:    store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
263; CHECK-NEXT:    ret i32 0
264;
265entry:
266  store i1 false, ptr %p, align 2
267  br i1 %cond, label %if.true, label %if.false
268
269if.true:                            ; preds = %entry
270  store i32 0, ptr %p, align 8
271  store i1 false, ptr %p, align 2
272  br label %if.false
273
274if.false:                                    ; preds = %if.true, %entry
275  ret i32 0
276}
277
278;; Both of successor 0 and successor 1 have a single predecessor.
279define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) {
280; CHECK-LABEL: @single_predecessor(
281; CHECK-NEXT:  entry:
282; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
283; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
284; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
285; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
286; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
287; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
288; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
289; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
290; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
291; CHECK-NEXT:    ret i32 [[DOT]]
292;
293entry:
294  %tobool = icmp ne i32 %a, 0
295  br i1 %tobool, label %if.end, label %if.then
296
297if.end:
298  store i32 1, ptr %q
299  ret i32 2
300
301if.then:
302  %0 = load i32, ptr %q
303  store i32 %0, ptr %p
304  ret i32 3
305}
306
307;; Hoist 6 stores.
308define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) {
309; CHECK-LABEL: @threshold_6(
310; CHECK-NEXT:  entry:
311; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
312; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
313; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
314; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
315; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
316; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
317; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
318; CHECK-NEXT:    ret void
319;
320entry:
321  br i1 %cond, label %if.true, label %if.false
322
323if.true:
324  store i32 1, ptr %p1, align 4
325  store i32 2, ptr %p2, align 4
326  store i32 3, ptr %p3, align 4
327  store i32 4, ptr %p4, align 4
328  store i32 5, ptr %p5, align 4
329  store i32 6, ptr %p6, align 4
330  br label %if.false
331
332if.false:
333  ret void
334}
335
336;; Not hoist 7 stores.
337define void @threshold_7(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6, ptr %p7) {
338; CHECK-LABEL: @threshold_7(
339; CHECK-NEXT:  entry:
340; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
341; CHECK:       if.true:
342; CHECK-NEXT:    store i32 1, ptr [[P1:%.*]], align 4
343; CHECK-NEXT:    store i32 2, ptr [[P2:%.*]], align 4
344; CHECK-NEXT:    store i32 3, ptr [[P3:%.*]], align 4
345; CHECK-NEXT:    store i32 4, ptr [[P4:%.*]], align 4
346; CHECK-NEXT:    store i32 5, ptr [[P5:%.*]], align 4
347; CHECK-NEXT:    store i32 6, ptr [[P6:%.*]], align 4
348; CHECK-NEXT:    store i32 7, ptr [[P7:%.*]], align 4
349; CHECK-NEXT:    br label [[IF_FALSE]]
350; CHECK:       if.false:
351; CHECK-NEXT:    ret void
352;
353entry:
354  br i1 %cond, label %if.true, label %if.false
355
356if.true:
357  store i32 1, ptr %p1, align 4
358  store i32 2, ptr %p2, align 4
359  store i32 3, ptr %p3, align 4
360  store i32 4, ptr %p4, align 4
361  store i32 5, ptr %p5, align 4
362  store i32 6, ptr %p6, align 4
363  store i32 7, ptr %p7, align 4
364  br label %if.false
365
366if.false:
367  ret void
368}
369
370;; Not do hoist if the cost of instructions to be hoisted is expensive.
371define i32 @not_cheap_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) {
372; CHECK-LABEL: @not_cheap_to_hoist(
373; CHECK-NEXT:  entry:
374; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
375; CHECK-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
376; CHECK:       common.ret:
377; CHECK-NEXT:    [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[IF_TRUE]] ]
378; CHECK-NEXT:    ret i32 [[COMMON_RET_OP]]
379; CHECK:       if.false:
380; CHECK-NEXT:    store i64 1, ptr [[P:%.*]], align 8
381; CHECK-NEXT:    store i16 2, ptr [[Q:%.*]], align 2
382; CHECK-NEXT:    [[V:%.*]] = udiv i32 [[A]], 12345
383; CHECK-NEXT:    [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]]
384; CHECK-NEXT:    [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]]
385; CHECK-NEXT:    [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]]
386; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
387; CHECK:       if.true:
388; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
389; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P]], align 4
390; CHECK-NEXT:    br label [[COMMON_RET]]
391;
392entry:
393  %cond = icmp eq i32 %a, 0
394  br i1 %cond, label %if.true, label %if.false
395
396if.false:
397  store i64 1, ptr %p
398  store i16 2, ptr %q
399
400  %v = udiv i32 %a, 12345
401  %vv = mul i32 %v, %v0
402  %vvv = mul i32 %vv, %v1
403  %vvvv = select i1 %cc, i32 %v2, i32 %vvv
404  ret i32 %vvvv
405
406if.true:
407  %0 = load i32, ptr %b
408  store i32 %0, ptr %p
409  br label %if.end
410
411if.end:
412  ret i32 0
413}
414
415;; Not hoist if there is more than 1 prodecessor.
416define void @not_single_predecessor(ptr %p, ptr %q, i32 %a) {
417; CHECK-LABEL: @not_single_predecessor(
418; CHECK-NEXT:  entry:
419; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
420; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
421; CHECK:       if.end:
422; CHECK-NEXT:    br label [[IF_THEN]]
423; CHECK:       if.then:
424; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
425; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
426; CHECK-NEXT:    br label [[IF_END]]
427;
428entry:
429  %tobool = icmp ne i32 %a, 0
430  br i1 %tobool, label %if.end, label %if.then
431
432if.end:
433  br label %if.then
434
435if.then:
436  %1 = load i32, ptr %q
437  store i32 %1, ptr %p
438  br label %if.end
439}
440
441;; Not hoist b/c i8 is not supported by conditional faulting.
442define void @not_supported_type(i8 %a, ptr %b, ptr %p, ptr %q) {
443; CHECK-LABEL: @not_supported_type(
444; CHECK-NEXT:  entry:
445; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[A:%.*]], 0
446; CHECK-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
447; CHECK:       if.false:
448; CHECK-NEXT:    store i8 1, ptr [[Q:%.*]], align 1
449; CHECK-NEXT:    br label [[IF_END:%.*]]
450; CHECK:       if.true:
451; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[B:%.*]], align 1
452; CHECK-NEXT:    store i8 [[TMP0]], ptr [[P:%.*]], align 1
453; CHECK-NEXT:    br label [[IF_END]]
454; CHECK:       if.end:
455; CHECK-NEXT:    ret void
456;
457entry:
458  %cond = icmp eq i8 %a, 0
459  br i1 %cond, label %if.true, label %if.false
460
461if.false:
462  store i8 1, ptr %q
463  br label %if.end
464
465if.true:
466  %0 = load i8, ptr %b
467  store i8 %0, ptr %p
468  br label %if.end
469
470if.end:
471  ret void
472}
473
474;; Not hoist if the terminator is not br.
475define void @not_br_terminator(i32 %a, ptr %b, ptr %p, ptr %q) {
476; CHECK-LABEL: @not_br_terminator(
477; CHECK-NEXT:  entry:
478; CHECK-NEXT:    switch i32 [[A:%.*]], label [[IF_END:%.*]] [
479; CHECK-NEXT:      i32 1, label [[IF_FALSE:%.*]]
480; CHECK-NEXT:      i32 2, label [[IF_TRUE:%.*]]
481; CHECK-NEXT:    ]
482; CHECK:       if.false:
483; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
484; CHECK-NEXT:    br label [[IF_END]]
485; CHECK:       if.true:
486; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
487; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
488; CHECK-NEXT:    br label [[IF_FALSE]]
489; CHECK:       if.end:
490; CHECK-NEXT:    ret void
491;
492entry:
493  switch i32 %a, label %if.end [
494  i32 1, label %if.false
495  i32 2, label %if.true
496  ]
497
498if.false:
499  store i32 1, ptr %q, align 4
500  br label %if.end
501
502if.true:
503  %0 = load i32, ptr %b, align 4
504  store i32 %0, ptr %p, align 4
505  br label %if.false
506
507if.end:
508  ret void
509}
510
511;; Not hoist if the instruction to be hoist is atomic.
512define void @not_atomic(i1 %cond, ptr %p) {
513; CHECK-LABEL: @not_atomic(
514; CHECK-NEXT:  entry:
515; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
516; CHECK:       if.false:
517; CHECK-NEXT:    store atomic i32 1, ptr [[P:%.*]] seq_cst, align 4
518; CHECK-NEXT:    br label [[IF_TRUE]]
519; CHECK:       if.true:
520; CHECK-NEXT:    ret void
521;
522entry:
523  br i1 %cond, label %if.true, label %if.false
524
525if.false:
526  store atomic i32 1, ptr %p seq_cst, align 4
527  br label %if.true
528
529if.true:
530  ret void
531}
532
533;; Not hoist if the instruction to be hoist is volatile.
534define void @not_volatile(i1 %cond, ptr %p) {
535; CHECK-LABEL: @not_volatile(
536; CHECK-NEXT:  entry:
537; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
538; CHECK:       if.false:
539; CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr [[P:%.*]], align 4
540; CHECK-NEXT:    br label [[IF_TRUE]]
541; CHECK:       if.true:
542; CHECK-NEXT:    ret void
543;
544entry:
545  br i1 %cond, label %if.true, label %if.false
546
547if.false:
548  %0 = load volatile i32, ptr %p, align 4
549  br label %if.true
550
551if.true:
552  ret void
553}
554
555;; Not hoist if there is an instruction that has side effect in the same bb.
556define void @not_hoistable_sideeffect(i1 %cond, ptr %p, ptr %q) {
557; CHECK-LABEL: @not_hoistable_sideeffect(
558; CHECK-NEXT:  entry:
559; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
560; CHECK:       if.false:
561; CHECK-NEXT:    [[RMW:%.*]] = atomicrmw xchg ptr [[Q:%.*]], double 4.000000e+00 seq_cst, align 8
562; CHECK-NEXT:    store i32 1, ptr [[P:%.*]], align 4
563; CHECK-NEXT:    br label [[IF_TRUE]]
564; CHECK:       if.true:
565; CHECK-NEXT:    ret void
566;
567entry:
568  br i1 %cond, label %if.true, label %if.false
569
570if.false:
571  %rmw= atomicrmw xchg ptr %q, double 4.0 seq_cst
572  store i32 1, ptr %p, align 4
573  br label %if.true
574
575if.true:
576  ret void
577}
578
579;; Not hoist if the branch is predictable and the `then` BB is not likely to execute.
580define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) {
581; CHECK-LABEL: @not_likely_to_execute(
582; CHECK-NEXT:  entry:
583; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
584; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF6:![0-9]+]]
585; CHECK:       if.end:
586; CHECK-NEXT:    ret void
587; CHECK:       if.then:
588; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
589; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
590; CHECK-NEXT:    br label [[IF_END]]
591;
592entry:
593  %tobool = icmp ne i32 %a, 0
594  br i1 %tobool, label %if.then, label %if.end, !prof !10
595
596if.end:
597  ret void
598
599if.then:
600  %0 = load i32, ptr %q
601  store i32 %0, ptr %p
602  br label %if.end
603}
604
605;; Now the optimization hoist-loads-stores-with-cond-faulting is run in codegen,
606;; which is after sroa and alloca is optimized away. So we don't need to do the transform
607;; for this case. But in the future, it is probably moved before sroa.
608define void @not_alloca(ptr %p, ptr %q, i32 %a) {
609; CHECK-LABEL: @not_alloca(
610; CHECK-NEXT:  entry:
611; CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
612; CHECK-NEXT:    [[Q_ADDR:%.*]] = alloca ptr, align 8
613; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
614; CHECK-NEXT:    store ptr [[P:%.*]], ptr [[P_ADDR]], align 8
615; CHECK-NEXT:    store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8
616; CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
617; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
618; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
619; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
620; CHECK:       if.then:
621; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[Q_ADDR]], align 8
622; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
623; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
624; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
625; CHECK-NEXT:    br label [[IF_END]]
626; CHECK:       if.end:
627; CHECK-NEXT:    ret void
628;
629entry:
630  %p.addr = alloca ptr
631  %q.addr = alloca ptr
632  %a.addr = alloca i32
633  store ptr %p, ptr %p.addr
634  store ptr %q, ptr %q.addr
635  store i32 %a, ptr %a.addr
636  %0 = load i32, ptr %a.addr
637  %tobool = icmp ne i32 %0, 0
638  br i1 %tobool, label %if.then, label %if.end
639
640if.then:
641  %1 = load ptr, ptr %q.addr
642  %2 = load i32, ptr %1
643  %3 = load ptr, ptr %p.addr
644  store i32 %2, ptr %3
645  br label %if.end
646
647if.end:
648  ret void
649}
650
651;; Not transform if alignment = 2^32.
652define void @not_maximum_alignment(i1 %cond, ptr %p) {
653; CHECK-LABEL: @not_maximum_alignment(
654; CHECK-NEXT:  entry:
655; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
656; CHECK:       if.true:
657; CHECK-NEXT:    store i32 0, ptr [[P:%.*]], align 4294967296
658; CHECK-NEXT:    br label [[IF_FALSE]]
659; CHECK:       if.false:
660; CHECK-NEXT:    ret void
661;
662entry:
663  br i1 %cond, label %if.true, label %if.false
664
665if.true:
666  store i32 0, ptr %p, align 4294967296
667  br label %if.false
668
669if.false:
670  ret void
671}
672
673define i32 @succ_phi_has_3input(i1 %cond1, ptr %p, i1 %cond2) {
674; CHECK-LABEL: @succ_phi_has_3input(
675; CHECK-NEXT:  entry:
676; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
677; CHECK:       bb1:
678; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
679; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
680; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
681; CHECK-NEXT:    br label [[BB3]]
682; CHECK:       bb3:
683; CHECK-NEXT:    [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
684; CHECK-NEXT:    store i64 [[Y]], ptr [[P]], align 8
685; CHECK-NEXT:    ret i32 0
686;
687entry:
688  br i1 %cond1, label %bb3, label %bb1
689
690bb1:                                                ; preds = %entry
691  br i1 %cond2, label %bb2, label %bb3
692
693bb2:                                                ; preds = %bb1
694  %x = load i64, ptr %p, align 8
695  br label %bb3
696
697bb3:                                                ; preds = %bb2, %bb1, %entry
698  %y = phi i64 [ %x, %bb2 ], [ 0, %bb1 ], [ 0, %entry ]
699  store i64 %y, ptr %p, align 8
700  ret i32 0
701}
702
703define i32 @succ1to0_phi2(ptr %p, ptr %p2) {
704; CHECK-LABEL: @succ1to0_phi2(
705; CHECK-NEXT:  entry:
706; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
707; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
708; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
709; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
710; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
711; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
712; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
713; CHECK-NEXT:    ret i32 [[TMP3]]
714;
715entry:
716  %cond = icmp eq ptr %p, null
717  br i1 %cond, label %if.true, label %if.false
718
719if.false:
720  %0 = load i32, ptr %p
721  store i32 %0, ptr %p2
722  br label %if.true
723
724if.true:
725  %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
726  ret i32 %res
727}
728
729define i32 @succ1to0_phi3(ptr %p, ptr %p2, i32 %x) {
730; CHECK-LABEL: @succ1to0_phi3(
731; CHECK-NEXT:  entry:
732; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
733; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
734; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
735; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[X:%.*]] to <1 x i32>
736; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> [[TMP2]])
737; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
738; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
739; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
740; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP4]]
741; CHECK-NEXT:    [[RES:%.*]] = add i32 [[SPEC_SELECT]], [[TMP4]]
742; CHECK-NEXT:    ret i32 [[RES]]
743;
744entry:
745  %cond = icmp eq ptr %p, null
746  br i1 %cond, label %if.true, label %if.false
747
748if.false:
749  %0 = load i32, ptr %p
750  store i32 %0, ptr %p2
751  br label %if.true
752
753if.true:
754  %res0 = phi i32 [ %0, %if.false ], [ 0, %entry ]
755  %res1 = phi i32 [ %0, %if.false ], [ %x, %entry ]
756  %res = add i32 %res0, %res1
757  ret i32 %res
758}
759
760;; Not transform if either BB has multiple successors.
761define i32 @not_multi_successors(i1 %c1, i32 %c2, ptr %p) {
762; CHECK-LABEL: @not_multi_successors(
763; CHECK-NEXT:  entry:
764; CHECK-NEXT:    br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]]
765; CHECK:       entry.if:
766; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4
767; CHECK-NEXT:    switch i32 [[C2:%.*]], label [[COMMON_RET]] [
768; CHECK-NEXT:      i32 0, label [[SW_BB:%.*]]
769; CHECK-NEXT:      i32 1, label [[SW_BB]]
770; CHECK-NEXT:    ]
771; CHECK:       common.ret:
772; CHECK-NEXT:    [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ]
773; CHECK-NEXT:    ret i32 [[COMMON_RET_OP]]
774; CHECK:       sw.bb:
775; CHECK-NEXT:    br label [[COMMON_RET]]
776;
777entry:
778  br i1 %c1, label %entry.if, label %entry.else
779
780entry.if:                                         ; preds = %entry
781  %val = load i32, ptr %p, align 4
782  switch i32 %c2, label %return [
783  i32 0, label %sw.bb
784  i32 1, label %sw.bb
785  ]
786
787entry.else:                                       ; preds = %entry
788  ret i32 0
789
790sw.bb:                                            ; preds = %entry.if, %entry.if
791  br label %return
792
793return:                                           ; preds = %sw.bb, %entry.if
794  %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ]
795  ret i32 %ret
796}
797
798declare i32 @read_memory_only() readonly nounwind willreturn speculatable
799
800!llvm.dbg.cu = !{!0}
801!llvm.module.flags = !{!2, !3}
802!llvm.ident = !{!4}
803
804!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "clang")
805!1 = !DIFile(filename: "foo.c", directory: "/tmp")
806!2 = !{i32 2, !"Dwarf Version", i32 4}
807!3 = !{i32 2, !"Debug Info Version", i32 3}
808!4 = !{!"clang"}
809!5 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed)
810!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
811!7 = !DILocation(line: 5, column: 7, scope: !6)
812!8 = !DILocalVariable(name: "a", scope: !6, line: 6, type: !5)
813!9 = distinct !DIAssignID()
814!10 = !{!"branch_weights", i32 1, i32 99}
815!11 = !{ !"auto-init" }
816