xref: /llvm-project/llvm/test/Transforms/GVN/PRE/pre-load.ll (revision eb3f2be36df4924c00cbb39846b7a1109b2c81e5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=gvn -enable-load-pre -S | FileCheck %s
3; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn<load-pre>" -enable-load-pre=false -S | FileCheck %s
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5
6define i32 @test1(ptr %p, i1 %C) {
7; CHECK-LABEL: @test1(
8; CHECK-NEXT:  block1:
9; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
10; CHECK:       block2:
11; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[P:%.*]], align 4
12; CHECK-NEXT:    br label [[BLOCK4:%.*]]
13; CHECK:       block3:
14; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
15; CHECK-NEXT:    br label [[BLOCK4]]
16; CHECK:       block4:
17; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
18; CHECK-NEXT:    ret i32 [[PRE]]
19;
20block1:
21  br i1 %C, label %block2, label %block3
22
23block2:
24  br label %block4
25
26block3:
27  store i32 0, ptr %p
28  br label %block4
29
30block4:
31  %PRE = load i32, ptr %p
32  ret i32 %PRE
33}
34
35; This is a simple phi translation case.
36define i32 @test2(ptr %p, ptr %q, i1 %C) {
37; CHECK-LABEL: @test2(
38; CHECK-NEXT:  block1:
39; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
40; CHECK:       block2:
41; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[Q:%.*]], align 4
42; CHECK-NEXT:    br label [[BLOCK4:%.*]]
43; CHECK:       block3:
44; CHECK-NEXT:    store i32 0, ptr [[P:%.*]], align 4
45; CHECK-NEXT:    br label [[BLOCK4]]
46; CHECK:       block4:
47; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
48; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
49; CHECK-NEXT:    ret i32 [[PRE]]
50;
51block1:
52  br i1 %C, label %block2, label %block3
53
54block2:
55  br label %block4
56
57block3:
58  store i32 0, ptr %p
59  br label %block4
60
61block4:
62  %P2 = phi ptr [%p, %block3], [%q, %block2]
63  %PRE = load i32, ptr %P2
64  ret i32 %PRE
65}
66
67; This is a PRE case that requires phi translation through a GEP.
68define i32 @test3(ptr %p, ptr %q, ptr %Hack, i1 %C) {
69; CHECK-LABEL: @test3(
70; CHECK-NEXT:  block1:
71; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[Q:%.*]], i32 1
72; CHECK-NEXT:    store ptr [[B]], ptr [[HACK:%.*]], align 8
73; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
74; CHECK:       block2:
75; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[B]], align 4
76; CHECK-NEXT:    br label [[BLOCK4:%.*]]
77; CHECK:       block3:
78; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
79; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
80; CHECK-NEXT:    br label [[BLOCK4]]
81; CHECK:       block4:
82; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
83; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
84; CHECK-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P2]], i32 1
85; CHECK-NEXT:    ret i32 [[PRE]]
86;
87block1:
88  %B = getelementptr i32, ptr %q, i32 1
89  store ptr %B, ptr %Hack
90  br i1 %C, label %block2, label %block3
91
92block2:
93  br label %block4
94
95block3:
96  %A = getelementptr i32, ptr %p, i32 1
97  store i32 0, ptr %A
98  br label %block4
99
100block4:
101  %P2 = phi ptr [%p, %block3], [%q, %block2]
102  %P3 = getelementptr i32, ptr %P2, i32 1
103  %PRE = load i32, ptr %P3
104  ret i32 %PRE
105}
106
107;; Here the loaded address is available, but the computation is in 'block3'
108;; which does not dominate 'block2'.
109define i32 @test4(ptr %p, ptr %q, ptr %Hack, i1 %C) {
110; CHECK-LABEL: @test4(
111; CHECK-NEXT:  block1:
112; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
113; CHECK:       block2:
114; CHECK-NEXT:    [[P3_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[Q:%.*]], i32 1
115; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[P3_PHI_TRANS_INSERT]], align 4
116; CHECK-NEXT:    br label [[BLOCK4:%.*]]
117; CHECK:       block3:
118; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[Q]], i32 1
119; CHECK-NEXT:    store ptr [[B]], ptr [[HACK:%.*]], align 8
120; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
121; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
122; CHECK-NEXT:    br label [[BLOCK4]]
123; CHECK:       block4:
124; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
125; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
126; CHECK-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P2]], i32 1
127; CHECK-NEXT:    ret i32 [[PRE]]
128;
129block1:
130  br i1 %C, label %block2, label %block3
131
132block2:
133  br label %block4
134
135block3:
136  %B = getelementptr i32, ptr %q, i32 1
137  store ptr %B, ptr %Hack
138
139  %A = getelementptr i32, ptr %p, i32 1
140  store i32 0, ptr %A
141  br label %block4
142
143block4:
144  %P2 = phi ptr [%p, %block3], [%q, %block2]
145  %P3 = getelementptr i32, ptr %P2, i32 1
146  %PRE = load i32, ptr %P3
147  ret i32 %PRE
148}
149
150; Same as test4, with a nuw flag on the GEP.
151define i32 @test4_nuw(ptr %p, ptr %q, ptr %Hack, i1 %C) {
152; CHECK-LABEL: @test4_nuw(
153; CHECK-NEXT:  block1:
154; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
155; CHECK:       block2:
156; CHECK-NEXT:    [[P3_PHI_TRANS_INSERT:%.*]] = getelementptr nuw i32, ptr [[Q:%.*]], i32 1
157; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[P3_PHI_TRANS_INSERT]], align 4
158; CHECK-NEXT:    br label [[BLOCK4:%.*]]
159; CHECK:       block3:
160; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[Q]], i32 1
161; CHECK-NEXT:    store ptr [[B]], ptr [[HACK:%.*]], align 8
162; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
163; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
164; CHECK-NEXT:    br label [[BLOCK4]]
165; CHECK:       block4:
166; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
167; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
168; CHECK-NEXT:    [[P3:%.*]] = getelementptr nuw i32, ptr [[P2]], i32 1
169; CHECK-NEXT:    ret i32 [[PRE]]
170;
171block1:
172  br i1 %C, label %block2, label %block3
173
174block2:
175  br label %block4
176
177block3:
178  %B = getelementptr i32, ptr %q, i32 1
179  store ptr %B, ptr %Hack
180
181  %A = getelementptr i32, ptr %p, i32 1
182  store i32 0, ptr %A
183  br label %block4
184
185block4:
186  %P2 = phi ptr [%p, %block3], [%q, %block2]
187  %P3 = getelementptr nuw i32, ptr %P2, i32 1
188  %PRE = load i32, ptr %P3
189  ret i32 %PRE
190}
191
192;void test5(int N, ptr G) {
193;  int j;
194;  for (j = 0; j < N - 1; j++)
195;    G[j] = G[j] + G[j+1];
196;}
197
198define void @test5(i32 %N, ptr nocapture %G) nounwind ssp {
199; CHECK-LABEL: @test5(
200; CHECK-NEXT:  entry:
201; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
202; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
203; CHECK-NEXT:    br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
204; CHECK:       bb.nph:
205; CHECK-NEXT:    [[TMP:%.*]] = zext i32 [[TMP0]] to i64
206; CHECK-NEXT:    [[DOTPRE:%.*]] = load double, ptr [[G:%.*]], align 8
207; CHECK-NEXT:    br label [[BB:%.*]]
208; CHECK:       bb:
209; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP3:%.*]], [[BB]] ]
210; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
211; CHECK-NEXT:    [[TMP6]] = add i64 [[INDVAR]], 1
212; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP6]]
213; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr double, ptr [[G]], i64 [[INDVAR]]
214; CHECK-NEXT:    [[TMP3]] = load double, ptr [[SCEVGEP]], align 8
215; CHECK-NEXT:    [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
216; CHECK-NEXT:    store double [[TMP4]], ptr [[SCEVGEP7]], align 8
217; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP6]], [[TMP]]
218; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
219; CHECK:       return:
220; CHECK-NEXT:    ret void
221;
222entry:
223  %0 = add i32 %N, -1
224  %1 = icmp sgt i32 %0, 0
225  br i1 %1, label %bb.nph, label %return
226
227bb.nph:
228  %tmp = zext i32 %0 to i64
229  br label %bb
230
231
232bb:
233  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
234  %tmp6 = add i64 %indvar, 1
235  %scevgep = getelementptr double, ptr %G, i64 %tmp6
236  %scevgep7 = getelementptr double, ptr %G, i64 %indvar
237  %2 = load double, ptr %scevgep7, align 8
238  %3 = load double, ptr %scevgep, align 8
239  %4 = fadd double %2, %3
240  store double %4, ptr %scevgep7, align 8
241  %exitcond = icmp eq i64 %tmp6, %tmp
242  br i1 %exitcond, label %return, label %bb
243
244; Should only be one load in the loop.
245
246return:
247  ret void
248}
249
250;void test6(int N, ptr G) {
251;  int j;
252;  for (j = 0; j < N - 1; j++)
253;    G[j+1] = G[j] + G[j+1];
254;}
255
256define void @test6(i32 %N, ptr nocapture %G) nounwind ssp {
257; CHECK-LABEL: @test6(
258; CHECK-NEXT:  entry:
259; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
260; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
261; CHECK-NEXT:    br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
262; CHECK:       bb.nph:
263; CHECK-NEXT:    [[TMP:%.*]] = zext i32 [[TMP0]] to i64
264; CHECK-NEXT:    [[DOTPRE:%.*]] = load double, ptr [[G:%.*]], align 8
265; CHECK-NEXT:    br label [[BB:%.*]]
266; CHECK:       bb:
267; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
268; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
269; CHECK-NEXT:    [[TMP6]] = add i64 [[INDVAR]], 1
270; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP6]]
271; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr double, ptr [[G]], i64 [[INDVAR]]
272; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[SCEVGEP]], align 8
273; CHECK-NEXT:    [[TMP4]] = fadd double [[TMP2]], [[TMP3]]
274; CHECK-NEXT:    store double [[TMP4]], ptr [[SCEVGEP]], align 8
275; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP6]], [[TMP]]
276; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
277; CHECK:       return:
278; CHECK-NEXT:    ret void
279;
280entry:
281  %0 = add i32 %N, -1
282  %1 = icmp sgt i32 %0, 0
283  br i1 %1, label %bb.nph, label %return
284
285bb.nph:
286  %tmp = zext i32 %0 to i64
287  br label %bb
288
289
290bb:
291  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
292  %tmp6 = add i64 %indvar, 1
293  %scevgep = getelementptr double, ptr %G, i64 %tmp6
294  %scevgep7 = getelementptr double, ptr %G, i64 %indvar
295  %2 = load double, ptr %scevgep7, align 8
296  %3 = load double, ptr %scevgep, align 8
297  %4 = fadd double %2, %3
298  store double %4, ptr %scevgep, align 8
299  %exitcond = icmp eq i64 %tmp6, %tmp
300  br i1 %exitcond, label %return, label %bb
301
302; Should only be one load in the loop.
303
304return:
305  ret void
306}
307
308;void test7(int N, ptr G) {
309;  long j;
310;  G[1] = 1;
311;  for (j = 1; j < N - 1; j++)
312;      G[j+1] = G[j] + G[j+1];
313;}
314
315; This requires phi translation of the adds.
316define void @test7(i32 %N, ptr nocapture %G) nounwind ssp {
317; CHECK-LABEL: @test7(
318; CHECK-NEXT:  entry:
319; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[G:%.*]], i64 1
320; CHECK-NEXT:    store double 1.000000e+00, ptr [[TMP0]], align 8
321; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[N:%.*]], -1
322; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 1
323; CHECK-NEXT:    br i1 [[TMP2]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
324; CHECK:       bb.nph:
325; CHECK-NEXT:    [[TMP:%.*]] = sext i32 [[TMP1]] to i64
326; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP]], -1
327; CHECK-NEXT:    br label [[BB:%.*]]
328; CHECK:       bb:
329; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ 1.000000e+00, [[BB_NPH]] ], [ [[TMP5:%.*]], [[BB]] ]
330; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP9:%.*]], [[BB]] ]
331; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDVAR]], 2
332; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP8]]
333; CHECK-NEXT:    [[TMP9]] = add i64 [[INDVAR]], 1
334; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP9]]
335; CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[SCEVGEP]], align 8
336; CHECK-NEXT:    [[TMP5]] = fadd double [[TMP3]], [[TMP4]]
337; CHECK-NEXT:    store double [[TMP5]], ptr [[SCEVGEP]], align 8
338; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP9]], [[TMP7]]
339; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
340; CHECK:       return:
341; CHECK-NEXT:    ret void
342;
343entry:
344  %0 = getelementptr inbounds double, ptr %G, i64 1
345  store double 1.000000e+00, ptr %0, align 8
346  %1 = add i32 %N, -1
347  %2 = icmp sgt i32 %1, 1
348  br i1 %2, label %bb.nph, label %return
349
350bb.nph:
351  %tmp = sext i32 %1 to i64
352  %tmp7 = add i64 %tmp, -1
353  br label %bb
354
355bb:
356  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
357  %tmp8 = add i64 %indvar, 2
358  %scevgep = getelementptr double, ptr %G, i64 %tmp8
359  %tmp9 = add i64 %indvar, 1
360  %scevgep10 = getelementptr double, ptr %G, i64 %tmp9
361  %3 = load double, ptr %scevgep10, align 8
362  %4 = load double, ptr %scevgep, align 8
363  %5 = fadd double %3, %4
364  store double %5, ptr %scevgep, align 8
365  %exitcond = icmp eq i64 %tmp9, %tmp7
366  br i1 %exitcond, label %return, label %bb
367
368; Should only be one load in the loop.
369
370return:
371  ret void
372}
373
374;; Here the loaded address isn't available in 'block2' at all, requiring a new
375;; GEP to be inserted into it.
376define i32 @test8(ptr %p, ptr %q, ptr %Hack, i1 %C) {
377; CHECK-LABEL: @test8(
378; CHECK-NEXT:  block1:
379; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
380; CHECK:       block2:
381; CHECK-NEXT:    [[P3_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[Q:%.*]], i32 1
382; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[P3_PHI_TRANS_INSERT]], align 4
383; CHECK-NEXT:    br label [[BLOCK4:%.*]]
384; CHECK:       block3:
385; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
386; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
387; CHECK-NEXT:    br label [[BLOCK4]]
388; CHECK:       block4:
389; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
390; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
391; CHECK-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P2]], i32 1
392; CHECK-NEXT:    ret i32 [[PRE]]
393;
394block1:
395  br i1 %C, label %block2, label %block3
396
397block2:
398  br label %block4
399
400block3:
401  %A = getelementptr i32, ptr %p, i32 1
402  store i32 0, ptr %A
403  br label %block4
404
405block4:
406  %P2 = phi ptr [%p, %block3], [%q, %block2]
407  %P3 = getelementptr i32, ptr %P2, i32 1
408  %PRE = load i32, ptr %P3
409  ret i32 %PRE
410}
411
412;void test9(int N, ptr G) {
413;  long j;
414;  for (j = 1; j < N - 1; j++)
415;      G[j+1] = G[j] + G[j+1];
416;}
417
418; This requires phi translation of the adds.
419define void @test9(i32 %N, ptr nocapture %G) nounwind ssp {
420; CHECK-LABEL: @test9(
421; CHECK-NEXT:  entry:
422; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
423; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 1
424; CHECK-NEXT:    br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
425; CHECK:       bb.nph:
426; CHECK-NEXT:    [[TMP:%.*]] = sext i32 [[TMP0]] to i64
427; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP]], -1
428; CHECK-NEXT:    [[SCEVGEP10_PHI_TRANS_INSERT:%.*]] = getelementptr double, ptr [[G:%.*]], i64 1
429; CHECK-NEXT:    [[DOTPRE:%.*]] = load double, ptr [[SCEVGEP10_PHI_TRANS_INSERT]], align 8
430; CHECK-NEXT:    br label [[BB:%.*]]
431; CHECK:       bb:
432; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
433; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP9:%.*]], [[BB]] ]
434; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDVAR]], 2
435; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP8]]
436; CHECK-NEXT:    [[TMP9]] = add i64 [[INDVAR]], 1
437; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP9]]
438; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[SCEVGEP]], align 8
439; CHECK-NEXT:    [[TMP4]] = fadd double [[TMP2]], [[TMP3]]
440; CHECK-NEXT:    store double [[TMP4]], ptr [[SCEVGEP]], align 8
441; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP9]], [[TMP7]]
442; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
443; CHECK:       return:
444; CHECK-NEXT:    ret void
445;
446entry:
447  add i32 0, 0
448  %1 = add i32 %N, -1
449  %2 = icmp sgt i32 %1, 1
450  br i1 %2, label %bb.nph, label %return
451
452bb.nph:
453  %tmp = sext i32 %1 to i64
454  %tmp7 = add i64 %tmp, -1
455  br label %bb
456
457
458bb:
459  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
460  %tmp8 = add i64 %indvar, 2
461  %scevgep = getelementptr double, ptr %G, i64 %tmp8
462  %tmp9 = add i64 %indvar, 1
463  %scevgep10 = getelementptr double, ptr %G, i64 %tmp9
464  %3 = load double, ptr %scevgep10, align 8
465  %4 = load double, ptr %scevgep, align 8
466  %5 = fadd double %3, %4
467  store double %5, ptr %scevgep, align 8
468  %exitcond = icmp eq i64 %tmp9, %tmp7
469  br i1 %exitcond, label %return, label %bb
470
471; Should only be one load in the loop.
472
473return:
474  ret void
475}
476
477;void test10(int N, ptr G) {
478;  long j;
479;  for (j = 1; j < N - 1; j++)
480;      G[j] = G[j] + G[j+1] + G[j-1];
481;}
482
483; PR5501
484define void @test10(i32 %N, ptr nocapture %G) nounwind ssp {
485; CHECK-LABEL: @test10(
486; CHECK-NEXT:  entry:
487; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
488; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 1
489; CHECK-NEXT:    br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
490; CHECK:       bb.nph:
491; CHECK-NEXT:    [[TMP:%.*]] = sext i32 [[TMP0]] to i64
492; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP]], -1
493; CHECK-NEXT:    [[SCEVGEP12_PHI_TRANS_INSERT:%.*]] = getelementptr double, ptr [[G:%.*]], i64 1
494; CHECK-NEXT:    [[DOTPRE:%.*]] = load double, ptr [[SCEVGEP12_PHI_TRANS_INSERT]], align 8
495; CHECK-NEXT:    [[DOTPRE1:%.*]] = load double, ptr [[G]], align 8
496; CHECK-NEXT:    br label [[BB:%.*]]
497; CHECK:       bb:
498; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[DOTPRE1]], [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
499; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
500; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP11:%.*]], [[BB]] ]
501; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, ptr [[G]], i64 [[INDVAR]]
502; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDVAR]], 2
503; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP9]]
504; CHECK-NEXT:    [[TMP11]] = add i64 [[INDVAR]], 1
505; CHECK-NEXT:    [[SCEVGEP12:%.*]] = getelementptr double, ptr [[G]], i64 [[TMP11]]
506; CHECK-NEXT:    [[TMP4]] = load double, ptr [[SCEVGEP10]], align 8
507; CHECK-NEXT:    [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]]
508; CHECK-NEXT:    [[TMP6]] = fadd double [[TMP5]], [[TMP2]]
509; CHECK-NEXT:    store double [[TMP6]], ptr [[SCEVGEP12]], align 8
510; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP11]], [[TMP8]]
511; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
512; CHECK:       return:
513; CHECK-NEXT:    ret void
514;
515entry:
516  %0 = add i32 %N, -1
517  %1 = icmp sgt i32 %0, 1
518  br i1 %1, label %bb.nph, label %return
519
520bb.nph:
521  %tmp = sext i32 %0 to i64
522  %tmp8 = add i64 %tmp, -1
523  br label %bb
524
525
526bb:
527  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
528  %scevgep = getelementptr double, ptr %G, i64 %indvar
529  %tmp9 = add i64 %indvar, 2
530  %scevgep10 = getelementptr double, ptr %G, i64 %tmp9
531  %tmp11 = add i64 %indvar, 1
532  %scevgep12 = getelementptr double, ptr %G, i64 %tmp11
533  %2 = load double, ptr %scevgep12, align 8
534  %3 = load double, ptr %scevgep10, align 8
535  %4 = fadd double %2, %3
536  %5 = load double, ptr %scevgep, align 8
537  %6 = fadd double %4, %5
538  store double %6, ptr %scevgep12, align 8
539  %exitcond = icmp eq i64 %tmp11, %tmp8
540  br i1 %exitcond, label %return, label %bb
541
542; Should only be one load in the loop.
543
544return:
545  ret void
546}
547
548; Test critical edge splitting.
549define i32 @test11(ptr %p, i1 %C, i32 %N) {
550; CHECK-LABEL: @test11(
551; CHECK-NEXT:  block1:
552; CHECK-NEXT:    br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
553; CHECK:       block2:
554; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i32 [[N:%.*]], 1
555; CHECK-NEXT:    br i1 [[COND]], label [[BLOCK2_BLOCK4_CRIT_EDGE:%.*]], label [[BLOCK5:%.*]]
556; CHECK:       block2.block4_crit_edge:
557; CHECK-NEXT:    [[PRE_PRE:%.*]] = load i32, ptr [[P:%.*]], align 4
558; CHECK-NEXT:    br label [[BLOCK4:%.*]]
559; CHECK:       block3:
560; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
561; CHECK-NEXT:    br label [[BLOCK4]]
562; CHECK:       block4:
563; CHECK-NEXT:    [[PRE:%.*]] = phi i32 [ [[PRE_PRE]], [[BLOCK2_BLOCK4_CRIT_EDGE]] ], [ 0, [[BLOCK3]] ]
564; CHECK-NEXT:    br label [[BLOCK5]]
565; CHECK:       block5:
566; CHECK-NEXT:    [[RET:%.*]] = phi i32 [ 0, [[BLOCK2]] ], [ [[PRE]], [[BLOCK4]] ]
567; CHECK-NEXT:    ret i32 [[RET]]
568;
569block1:
570  br i1 %C, label %block2, label %block3
571
572block2:
573  %cond = icmp sgt i32 %N, 1
574  br i1 %cond, label %block4, label %block5
575
576block3:
577  store i32 0, ptr %p
578  br label %block4
579
580block4:
581  %PRE = load i32, ptr %p
582  br label %block5
583
584block5:
585  %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
586  ret i32 %ret
587}
588
589declare void @f()
590declare void @g(i32)
591declare i32 @__CxxFrameHandler3(...)
592
593; Test that loads aren't PRE'd into EH pads.
594define void @test12(ptr %p) personality ptr @__CxxFrameHandler3 {
595; CHECK-LABEL: @test12(
596; CHECK-NEXT:  block1:
597; CHECK-NEXT:    invoke void @f()
598; CHECK-NEXT:            to label [[BLOCK2:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
599; CHECK:       block2:
600; CHECK-NEXT:    invoke void @f()
601; CHECK-NEXT:            to label [[BLOCK3:%.*]] unwind label [[CLEANUP:%.*]]
602; CHECK:       block3:
603; CHECK-NEXT:    ret void
604; CHECK:       catch.dispatch:
605; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch] unwind label [[CLEANUP2:%.*]]
606; CHECK:       catch:
607; CHECK-NEXT:    [[C:%.*]] = catchpad within [[CS1]] []
608; CHECK-NEXT:    catchret from [[C]] to label [[BLOCK2]]
609; CHECK:       cleanup:
610; CHECK-NEXT:    [[C1:%.*]] = cleanuppad within none []
611; CHECK-NEXT:    store i32 0, ptr [[P:%.*]], align 4
612; CHECK-NEXT:    cleanupret from [[C1]] unwind label [[CLEANUP2]]
613; CHECK:       cleanup2:
614; CHECK-NEXT:    [[C2:%.*]] = cleanuppad within none []
615; CHECK-NEXT:    [[NOTPRE:%.*]] = load i32, ptr [[P]], align 4
616; CHECK-NEXT:    call void @g(i32 [[NOTPRE]])
617; CHECK-NEXT:    cleanupret from [[C2]] unwind to caller
618;
619block1:
620  invoke void @f()
621  to label %block2 unwind label %catch.dispatch
622
623block2:
624  invoke void @f()
625  to label %block3 unwind label %cleanup
626
627block3:
628  ret void
629
630catch.dispatch:
631  %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
632
633catch:
634  %c = catchpad within %cs1 []
635  catchret from %c to label %block2
636
637cleanup:
638  %c1 = cleanuppad within none []
639  store i32 0, ptr %p
640  cleanupret from %c1 unwind label %cleanup2
641
642cleanup2:
643  %c2 = cleanuppad within none []
644  %NOTPRE = load i32, ptr %p
645  call void @g(i32 %NOTPRE)
646  cleanupret from %c2 unwind to caller
647}
648
649; Don't PRE load across potentially throwing calls.
650
651define i32 @test13(ptr noalias nocapture readonly %x, ptr noalias nocapture %r, i32 %a) {
652; CHECK-LABEL: @test13(
653; CHECK-NEXT:  entry:
654; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
655; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
656; CHECK:       if.then:
657; CHECK-NEXT:    [[UU:%.*]] = load i32, ptr [[X:%.*]], align 4
658; CHECK-NEXT:    store i32 [[UU]], ptr [[R:%.*]], align 4
659; CHECK-NEXT:    br label [[IF_END]]
660; CHECK:       if.end:
661; CHECK-NEXT:    call void @f()
662; CHECK-NEXT:    [[VV:%.*]] = load i32, ptr [[X]], align 4
663; CHECK-NEXT:    ret i32 [[VV]]
664;
665
666entry:
667  %tobool = icmp eq i32 %a, 0
668  br i1 %tobool, label %if.end, label %if.then
669
670
671if.then:
672  %uu = load i32, ptr %x, align 4
673  store i32 %uu, ptr %r, align 4
674  br label %if.end
675
676
677if.end:
678  call void @f()
679  %vv = load i32, ptr %x, align 4
680  ret i32 %vv
681}
682
683; Same as test13, but now the blocking function is not immediately in load's
684; block.
685
686define i32 @test14(ptr noalias nocapture readonly %x, ptr noalias nocapture %r, i32 %a) {
687; CHECK-LABEL: @test14(
688; CHECK-NEXT:  entry:
689; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
690; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
691; CHECK:       if.then:
692; CHECK-NEXT:    [[UU:%.*]] = load i32, ptr [[X:%.*]], align 4
693; CHECK-NEXT:    store i32 [[UU]], ptr [[R:%.*]], align 4
694; CHECK-NEXT:    br label [[IF_END]]
695; CHECK:       if.end:
696; CHECK-NEXT:    call void @f()
697; CHECK-NEXT:    [[VV:%.*]] = load i32, ptr [[X]], align 4
698; CHECK-NEXT:    ret i32 [[VV]]
699;
700
701entry:
702  %tobool = icmp eq i32 %a, 0
703  br i1 %tobool, label %if.end, label %if.then
704
705
706if.then:
707  %uu = load i32, ptr %x, align 4
708  store i32 %uu, ptr %r, align 4
709  br label %if.end
710
711
712if.end:
713  call void @f()
714  br label %follow_1
715
716follow_1:
717  br label %follow_2
718
719follow_2:
720  %vv = load i32, ptr %x, align 4
721  ret i32 %vv
722}
723
724; Same as test13, but %x here is dereferenceable. A pointer that is
725; dereferenceable can be loaded from speculatively without a risk of trapping.
726; Since it is OK to speculate, PRE is allowed.
727
728define i32 @test15(ptr noalias nocapture readonly dereferenceable(8) align 4 %x, ptr noalias nocapture %r, i32 %a) nofree nosync {
729; CHECK-LABEL: @test15(
730; CHECK-NEXT:  entry:
731; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
732; CHECK-NEXT:    [[VV_PRE:%.*]] = load i32, ptr [[X:%.*]], align 4
733; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
734; CHECK:       if.then:
735; CHECK-NEXT:    store i32 [[VV_PRE]], ptr [[R:%.*]], align 4
736; CHECK-NEXT:    br label [[IF_END]]
737; CHECK:       if.end:
738; CHECK-NEXT:    call void @f()
739; CHECK-NEXT:    ret i32 [[VV_PRE]]
740;
741
742entry:
743  %tobool = icmp eq i32 %a, 0
744  br i1 %tobool, label %if.end, label %if.then
745
746
747if.then:
748  %uu = load i32, ptr %x, align 4
749  store i32 %uu, ptr %r, align 4
750  br label %if.end
751
752
753if.end:
754  call void @f()
755  %vv = load i32, ptr %x, align 4
756  ret i32 %vv
757
758
759}
760
761; Same as test14, but %x here is dereferenceable. A pointer that is
762; dereferenceable can be loaded from speculatively without a risk of trapping.
763; Since it is OK to speculate, PRE is allowed.
764
765define i32 @test16(ptr noalias nocapture readonly dereferenceable(8) align 4 %x, ptr noalias nocapture %r, i32 %a) nofree nosync {
766; CHECK-LABEL: @test16(
767; CHECK-NEXT:  entry:
768; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
769; CHECK-NEXT:    [[VV_PRE:%.*]] = load i32, ptr [[X:%.*]], align 4
770; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
771; CHECK:       if.then:
772; CHECK-NEXT:    store i32 [[VV_PRE]], ptr [[R:%.*]], align 4
773; CHECK-NEXT:    br label [[IF_END]]
774; CHECK:       if.end:
775; CHECK-NEXT:    call void @f()
776; CHECK-NEXT:    ret i32 [[VV_PRE]]
777;
778
779entry:
780  %tobool = icmp eq i32 %a, 0
781  br i1 %tobool, label %if.end, label %if.then
782
783
784if.then:
785  %uu = load i32, ptr %x, align 4
786  store i32 %uu, ptr %r, align 4
787  br label %if.end
788
789
790if.end:
791  call void @f()
792  br label %follow_1
793
794
795follow_1:
796  br label %follow_2
797
798follow_2:
799  %vv = load i32, ptr %x, align 4
800  ret i32 %vv
801}
802
803declare i1 @foo()
804declare i1 @bar()
805
806; %v3 is partially redundant, bb3 has multiple predecessors coming through
807; critical edges. The other successors of those predecessors have same loads.
808; We can move all loads into predecessors.
809
810define void @test17(ptr %p1, ptr %p2, ptr %p3, ptr %p4)
811; CHECK-LABEL: @test17(
812; CHECK-NEXT:  entry:
813; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[P1:%.*]], align 8
814; CHECK-NEXT:    [[COND1:%.*]] = icmp sgt i64 [[V1]], 200
815; CHECK-NEXT:    br i1 [[COND1]], label [[BB200:%.*]], label [[BB1:%.*]]
816; CHECK:       bb1:
817; CHECK-NEXT:    [[COND2:%.*]] = icmp sgt i64 [[V1]], 100
818; CHECK-NEXT:    br i1 [[COND2]], label [[BB100:%.*]], label [[BB2:%.*]]
819; CHECK:       bb2:
820; CHECK-NEXT:    [[V2:%.*]] = add nsw i64 [[V1]], 1
821; CHECK-NEXT:    store i64 [[V2]], ptr [[P1]], align 8
822; CHECK-NEXT:    br label [[BB3:%.*]]
823; CHECK:       bb3:
824; CHECK-NEXT:    [[V3:%.*]] = phi i64 [ [[V3_PRE:%.*]], [[BB200]] ], [ [[V3_PRE1:%.*]], [[BB100]] ], [ [[V2]], [[BB2]] ]
825; CHECK-NEXT:    store i64 [[V3]], ptr [[P2:%.*]], align 8
826; CHECK-NEXT:    ret void
827; CHECK:       bb100:
828; CHECK-NEXT:    [[COND3:%.*]] = call i1 @foo()
829; CHECK-NEXT:    [[V3_PRE1]] = load i64, ptr [[P1]], align 8
830; CHECK-NEXT:    br i1 [[COND3]], label [[BB3]], label [[BB101:%.*]]
831; CHECK:       bb101:
832; CHECK-NEXT:    store i64 [[V3_PRE1]], ptr [[P3:%.*]], align 8
833; CHECK-NEXT:    ret void
834; CHECK:       bb200:
835; CHECK-NEXT:    [[COND4:%.*]] = call i1 @bar()
836; CHECK-NEXT:    [[V3_PRE]] = load i64, ptr [[P1]], align 8
837; CHECK-NEXT:    br i1 [[COND4]], label [[BB3]], label [[BB201:%.*]]
838; CHECK:       bb201:
839; CHECK-NEXT:    store i64 [[V3_PRE]], ptr [[P4:%.*]], align 8
840; CHECK-NEXT:    ret void
841;
842{
843entry:
844  %v1 = load i64, ptr %p1, align 8
845  %cond1 = icmp sgt i64 %v1, 200
846  br i1 %cond1, label %bb200, label %bb1
847
848bb1:
849  %cond2 = icmp sgt i64 %v1, 100
850  br i1 %cond2, label %bb100, label %bb2
851
852bb2:
853  %v2 = add nsw i64 %v1, 1
854  store i64 %v2, ptr %p1, align 8
855  br label %bb3
856
857bb3:
858  %v3 = load i64, ptr %p1, align 8
859  store i64 %v3, ptr %p2, align 8
860  ret void
861
862bb100:
863  %cond3 = call i1 @foo()
864  br i1 %cond3, label %bb3, label %bb101
865
866bb101:
867  %v4 = load i64, ptr %p1, align 8
868  store i64 %v4, ptr %p3, align 8
869  ret void
870
871bb200:
872  %cond4 = call i1 @bar()
873  br i1 %cond4, label %bb3, label %bb201
874
875bb201:
876  %v5 = load i64, ptr %p1, align 8
877  store i64 %v5, ptr %p4, align 8
878  ret void
879}
880
881; The output value from %if.then block is %dec, not loaded %v1.
882; So ValuesPerBlock[%if.then] should not be replaced when the load instruction
883; is moved to %entry.
884define void @test18(i1 %cond, ptr %p1, ptr %p2) {
885; CHECK-LABEL: @test18(
886; CHECK-NEXT:  entry:
887; CHECK-NEXT:    [[V2_PRE:%.*]] = load i16, ptr [[P1:%.*]], align 2
888; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
889; CHECK:       if.then:
890; CHECK-NEXT:    [[DEC:%.*]] = add i16 [[V2_PRE]], -1
891; CHECK-NEXT:    store i16 [[DEC]], ptr [[P1]], align 2
892; CHECK-NEXT:    br label [[IF_END]]
893; CHECK:       if.end:
894; CHECK-NEXT:    [[V2:%.*]] = phi i16 [ [[DEC]], [[IF_THEN]] ], [ [[V2_PRE]], [[ENTRY:%.*]] ]
895; CHECK-NEXT:    store i16 [[V2]], ptr [[P2:%.*]], align 2
896; CHECK-NEXT:    ret void
897;
898entry:
899  br i1 %cond, label %if.end, label %if.then
900
901if.then:
902  %v1 = load i16, ptr %p1
903  %dec = add i16 %v1, -1
904  store i16 %dec, ptr %p1
905  br label %if.end
906
907if.end:
908  %v2 = load i16, ptr %p1
909  store i16 %v2, ptr %p2
910  ret void
911}
912
913; PRE of load instructions should not cross exception handling instructions.
914define void @test19(i1 %cond, ptr %p1, ptr %p2)
915; CHECK-LABEL: @test19(
916; CHECK-NEXT:  entry:
917; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
918; CHECK:       then:
919; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[P2:%.*]], align 8
920; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[V2]], 1
921; CHECK-NEXT:    store i64 [[ADD]], ptr [[P1:%.*]], align 8
922; CHECK-NEXT:    br label [[END:%.*]]
923; CHECK:       else:
924; CHECK-NEXT:    invoke void @f()
925; CHECK-NEXT:            to label [[ELSE_END_CRIT_EDGE:%.*]] unwind label [[LPAD:%.*]]
926; CHECK:       else.end_crit_edge:
927; CHECK-NEXT:    [[V1_PRE:%.*]] = load i64, ptr [[P1]], align 8
928; CHECK-NEXT:    br label [[END]]
929; CHECK:       end:
930; CHECK-NEXT:    [[V1:%.*]] = phi i64 [ [[V1_PRE]], [[ELSE_END_CRIT_EDGE]] ], [ [[ADD]], [[THEN]] ]
931; CHECK-NEXT:    [[AND:%.*]] = and i64 [[V1]], 100
932; CHECK-NEXT:    store i64 [[AND]], ptr [[P2]], align 8
933; CHECK-NEXT:    ret void
934; CHECK:       lpad:
935; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
936; CHECK-NEXT:            cleanup
937; CHECK-NEXT:    [[V3:%.*]] = load i64, ptr [[P1]], align 8
938; CHECK-NEXT:    [[OR:%.*]] = or i64 [[V3]], 200
939; CHECK-NEXT:    store i64 [[OR]], ptr [[P1]], align 8
940; CHECK-NEXT:    resume { ptr, i32 } [[LP]]
941;
942  personality ptr @__CxxFrameHandler3 {
943entry:
944  br i1 %cond, label %then, label %else
945
946then:
947  %v2 = load i64, ptr %p2
948  %add = add i64 %v2, 1
949  store i64 %add, ptr %p1
950  br label %end
951
952else:
953  invoke void @f()
954  to label %end unwind label %lpad
955
956end:
957  %v1 = load i64, ptr %p1
958  %and = and i64 %v1, 100
959  store i64 %and, ptr %p2
960  ret void
961
962lpad:
963  %lp = landingpad { ptr, i32 }
964  cleanup
965  %v3 = load i64, ptr %p1
966  %or = or i64 %v3, 200
967  store i64 %or, ptr %p1
968  resume { ptr, i32 } %lp
969}
970
971; A predecessor BB has both successors to the same BB, for simplicity we don't
972; handle it, nothing should be changed.
973define void @test20(i1 %cond, i1 %cond2, ptr %p1, ptr %p2) {
974; CHECK-LABEL: @test20(
975; CHECK-NEXT:  entry:
976; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
977; CHECK:       if.then:
978; CHECK-NEXT:    [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2
979; CHECK-NEXT:    [[DEC:%.*]] = add i16 [[V1]], -1
980; CHECK-NEXT:    store i16 [[DEC]], ptr [[P1]], align 2
981; CHECK-NEXT:    br label [[IF_END:%.*]]
982; CHECK:       if.else:
983; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[IF_END]], label [[IF_END]]
984; CHECK:       if.end:
985; CHECK-NEXT:    [[V2:%.*]] = load i16, ptr [[P1]], align 2
986; CHECK-NEXT:    store i16 [[V2]], ptr [[P2:%.*]], align 2
987; CHECK-NEXT:    ret void
988;
989entry:
990  br i1 %cond, label %if.then, label %if.else
991
992if.then:
993  %v1 = load i16, ptr %p1
994  %dec = add i16 %v1, -1
995  store i16 %dec, ptr %p1
996  br label %if.end
997
998if.else:
999  br i1 %cond2, label %if.end, label %if.end
1000
1001if.end:
1002  %v2 = load i16, ptr %p1
1003  store i16 %v2, ptr %p2
1004  ret void
1005}
1006
1007; More edges from the same BB to LoadBB. Don't change anything.
1008define void @test21(i1 %cond, i32 %code, ptr %p1, ptr %p2) {
1009; CHECK-LABEL: @test21(
1010; CHECK-NEXT:  entry:
1011; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
1012; CHECK:       if.then:
1013; CHECK-NEXT:    [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2
1014; CHECK-NEXT:    [[DEC:%.*]] = add i16 [[V1]], -1
1015; CHECK-NEXT:    store i16 [[DEC]], ptr [[P1]], align 2
1016; CHECK-NEXT:    br label [[IF_END:%.*]]
1017; CHECK:       if.else:
1018; CHECK-NEXT:    switch i32 [[CODE:%.*]], label [[IF_END]] [
1019; CHECK-NEXT:      i32 1, label [[IF_END]]
1020; CHECK-NEXT:      i32 2, label [[IF_END]]
1021; CHECK-NEXT:      i32 3, label [[IF_END]]
1022; CHECK-NEXT:    ]
1023; CHECK:       if.end:
1024; CHECK-NEXT:    [[V2:%.*]] = load i16, ptr [[P1]], align 2
1025; CHECK-NEXT:    store i16 [[V2]], ptr [[P2:%.*]], align 2
1026; CHECK-NEXT:    ret void
1027;
1028entry:
1029  br i1 %cond, label %if.then, label %if.else
1030
1031if.then:
1032  %v1 = load i16, ptr %p1
1033  %dec = add i16 %v1, -1
1034  store i16 %dec, ptr %p1
1035  br label %if.end
1036
1037if.else:
1038  switch i32 %code, label %if.end [
1039  i32 1, label %if.end
1040  i32 2, label %if.end
1041  i32 3, label %if.end
1042  ]
1043
1044if.end:
1045  %v2 = load i16, ptr %p1
1046  store i16 %v2, ptr %p2
1047  ret void
1048}
1049
1050; Call to function @maybethrow may cause exception, so the load of %v3 can't
1051; be hoisted to block %if.else.
1052define void @test22(i1 %cond, ptr %p1, ptr %p2) {
1053; CHECK-LABEL: @test22(
1054; CHECK-NEXT:  entry:
1055; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
1056; CHECK:       if.then:
1057; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[P1:%.*]], align 8
1058; CHECK-NEXT:    [[DEC:%.*]] = add i64 [[V1]], -1
1059; CHECK-NEXT:    store i64 [[DEC]], ptr [[P1]], align 8
1060; CHECK-NEXT:    br label [[IF_END:%.*]]
1061; CHECK:       if.end:
1062; CHECK-NEXT:    [[V2:%.*]] = phi i64 [ [[V2_PRE:%.*]], [[IF_ELSE_IF_END_CRIT_EDGE:%.*]] ], [ [[DEC]], [[IF_THEN]] ]
1063; CHECK-NEXT:    store i64 [[V2]], ptr [[P2:%.*]], align 8
1064; CHECK-NEXT:    ret void
1065; CHECK:       if.else:
1066; CHECK-NEXT:    [[COND2:%.*]] = call i1 @foo()
1067; CHECK-NEXT:    br i1 [[COND2]], label [[IF_ELSE_IF_END_CRIT_EDGE]], label [[EXIT:%.*]]
1068; CHECK:       if.else.if.end_crit_edge:
1069; CHECK-NEXT:    [[V2_PRE]] = load i64, ptr [[P1]], align 8
1070; CHECK-NEXT:    br label [[IF_END]]
1071; CHECK:       exit:
1072; CHECK-NEXT:    [[_:%.*]] = call i1 @maybethrow()
1073; CHECK-NEXT:    [[V3:%.*]] = load i64, ptr [[P1]], align 8
1074; CHECK-NEXT:    store i64 [[V3]], ptr [[P2]], align 8
1075; CHECK-NEXT:    ret void
1076;
1077entry:
1078  br i1 %cond, label %if.then, label %if.else
1079
1080if.then:
1081  %v1 = load i64, ptr %p1
1082  %dec = add i64 %v1, -1
1083  store i64 %dec, ptr %p1
1084  br label %if.end
1085
1086if.end:
1087  %v2 = load i64, ptr %p1
1088  store i64 %v2, ptr %p2
1089  ret void
1090
1091if.else:
1092  %cond2 = call i1 @foo()
1093  br i1 %cond2, label %if.end, label %exit
1094
1095exit:
1096  %_ = call i1 @maybethrow()
1097  %v3 = load i64, ptr %p1
1098  store i64 %v3, ptr %p2
1099  ret void
1100}
1101
1102declare void @maybethrow() readnone
1103@B = external global i64, align 8
1104
1105; When BB in ValuesPerBlock(BB, OldLoad) is not OldLoad->getParent(), it should
1106; also be replaced by ValuesPerBlock(BB, NewLoad). So we'll not use the deleted
1107; OldLoad in later PHI instruction.
1108define void @test23(i1 %cond1, i1 %cond2) {
1109; CHECK-LABEL: @test23(
1110; CHECK-NEXT:  entry:
1111; CHECK-NEXT:    [[G:%.*]] = alloca i64, align 8
1112; CHECK-NEXT:    [[VAL1_PRE:%.*]] = load i64, ptr @B, align 8
1113; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[THEN:%.*]], label [[WRONG:%.*]]
1114; CHECK:       then:
1115; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[STORE:%.*]], label [[EXIT:%.*]]
1116; CHECK:       store:
1117; CHECK-NEXT:    store i64 [[VAL1_PRE]], ptr @B, align 8
1118; CHECK-NEXT:    br label [[WRONG]]
1119; CHECK:       wrong:
1120; CHECK-NEXT:    store i64 [[VAL1_PRE]], ptr [[G]], align 8
1121; CHECK-NEXT:    ret void
1122; CHECK:       exit:
1123; CHECK-NEXT:    ret void
1124;
1125entry:
1126  %G = alloca i64, align 8
1127  br i1 %cond2, label %then, label %wrong
1128
1129then:
1130  %val2 = load i64, ptr @B, align 8
1131  br i1 %cond1, label %store, label %exit
1132
1133store:
1134  store i64 %val2, ptr @B, align 8
1135  br label %wrong
1136
1137wrong:
1138  %val1 = load i64, ptr @B, align 8
1139  store i64 %val1, ptr %G, align 8
1140  ret void
1141
1142exit:
1143  ret void
1144}
1145