xref: /llvm-project/llvm/test/Transforms/LoopUnroll/unroll-loads-cse.ll (revision 175d2971020ceaad3e1adcf9bb92e4ebaaa449ee)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -p loop-unroll -S %s | FileCheck %s
3
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
6define void @cse_matching_load_from_previous_unrolled_iteration(ptr %src, ptr noalias %dst, i64 %N) {
7; CHECK-LABEL: define void @cse_matching_load_from_previous_unrolled_iteration(
8; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
11; CHECK-NEXT:    [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
12; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
13; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
14; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
15; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
16; CHECK:       entry.new:
17; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
18; CHECK-NEXT:    br label [[LOOP:%.*]]
19; CHECK:       loop:
20; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
21; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
22; CHECK-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
23; CHECK-NEXT:    [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
24; CHECK-NEXT:    [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
25; CHECK-NEXT:    [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
26; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
27; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
28; CHECK-NEXT:    store i64 [[MUL]], ptr [[GEP_DST]], align 8
29; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
30; CHECK-NEXT:    [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
31; CHECK-NEXT:    [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
32; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_12]]
33; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
34; CHECK-NEXT:    store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
35; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
36; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
37; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
38; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
39; CHECK:       exit.unr-lcssa.loopexit:
40; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
41; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
42; CHECK:       exit.unr-lcssa:
43; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
44; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
45; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
46; CHECK:       loop.epil.preheader:
47; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
48; CHECK:       loop.epil:
49; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
50; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
51; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
52; CHECK-NEXT:    [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
53; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
54; CHECK-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
55; CHECK-NEXT:    store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
56; CHECK-NEXT:    br label [[EXIT]]
57; CHECK:       exit:
58; CHECK-NEXT:    ret void
59;
60entry:
61  %src.4 = getelementptr i8, ptr %src, i64 4
62  %src.12 = getelementptr i8, ptr %src, i64 12
63  br label %loop
64
65loop:
66  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
67  %gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
68  %l.12 = load i64, ptr %gep.src.12, align 8
69  %gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
70  %l.4 = load i64, ptr %gep.src.4, align 8
71  %mul = mul i64 %l.12, %l.4
72  %gep.dst = getelementptr i64, ptr %dst, i64 %iv
73  store i64 %mul, ptr %gep.dst
74  %iv.next = add nuw nsw i64 %iv, 1
75  %c = icmp eq i64 %iv.next, %N
76  br i1 %c, label %exit, label %loop, !llvm.loop !1
77
78exit:
79  ret void
80}
81
82define void @cse_different_load_types(ptr %src, ptr noalias %dst, i64 %N) {
83; CHECK-LABEL: define void @cse_different_load_types(
84; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
85; CHECK-NEXT:  entry:
86; CHECK-NEXT:    [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
87; CHECK-NEXT:    [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
88; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
89; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
90; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
91; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
92; CHECK:       entry.new:
93; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
94; CHECK-NEXT:    br label [[LOOP:%.*]]
95; CHECK:       loop:
96; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
97; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
98; CHECK-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
99; CHECK-NEXT:    [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 8
100; CHECK-NEXT:    [[L_12_EXT:%.*]] = zext i32 [[L_12]] to i64
101; CHECK-NEXT:    [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
102; CHECK-NEXT:    [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
103; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[L_12_EXT]], [[L_4]]
104; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
105; CHECK-NEXT:    store i64 [[MUL]], ptr [[GEP_DST]], align 8
106; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
107; CHECK-NEXT:    [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
108; CHECK-NEXT:    [[L_12_1:%.*]] = load i32, ptr [[GEP_SRC_12_1]], align 8
109; CHECK-NEXT:    [[L_12_EXT_1:%.*]] = zext i32 [[L_12_1]] to i64
110; CHECK-NEXT:    [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
111; CHECK-NEXT:    [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
112; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[L_12_EXT_1]], [[L_4_1]]
113; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
114; CHECK-NEXT:    store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
115; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
116; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
117; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
118; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
119; CHECK:       exit.unr-lcssa.loopexit:
120; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
121; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
122; CHECK:       exit.unr-lcssa:
123; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
124; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
125; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
126; CHECK:       loop.epil.preheader:
127; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
128; CHECK:       loop.epil:
129; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
130; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load i32, ptr [[GEP_SRC_12_EPIL]], align 8
131; CHECK-NEXT:    [[L_12_EXT_EPIL:%.*]] = zext i32 [[L_12_EPIL]] to i64
132; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
133; CHECK-NEXT:    [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
134; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul i64 [[L_12_EXT_EPIL]], [[L_4_EPIL]]
135; CHECK-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
136; CHECK-NEXT:    store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
137; CHECK-NEXT:    br label [[EXIT]]
138; CHECK:       exit:
139; CHECK-NEXT:    ret void
140;
141entry:
142  %src.4 = getelementptr i8, ptr %src, i64 4
143  %src.12 = getelementptr i8, ptr %src, i64 12
144  br label %loop
145
146loop:
147  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
148  %gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
149  %l.12 = load i32, ptr %gep.src.12, align 8
150  %l.12.ext = zext i32 %l.12 to i64
151  %gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
152  %l.4 = load i64, ptr %gep.src.4, align 8
153  %mul = mul i64 %l.12.ext, %l.4
154  %gep.dst = getelementptr i64, ptr %dst, i64 %iv
155  store i64 %mul, ptr %gep.dst
156  %iv.next = add nuw nsw i64 %iv, 1
157  %c = icmp eq i64 %iv.next, %N
158  br i1 %c, label %exit, label %loop, !llvm.loop !1
159
160exit:
161  ret void
162}
163
164define void @cse_volatile_loads(ptr %src, ptr noalias %dst, i64 %N) {
165; CHECK-LABEL: define void @cse_volatile_loads(
166; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
167; CHECK-NEXT:  entry:
168; CHECK-NEXT:    [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
169; CHECK-NEXT:    [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
170; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
171; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
172; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
173; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
174; CHECK:       entry.new:
175; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
176; CHECK-NEXT:    br label [[LOOP:%.*]]
177; CHECK:       loop:
178; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
179; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
180; CHECK-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
181; CHECK-NEXT:    [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
182; CHECK-NEXT:    [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
183; CHECK-NEXT:    [[L_4:%.*]] = load volatile i64, ptr [[GEP_SRC_4]], align 8
184; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
185; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
186; CHECK-NEXT:    store i64 [[MUL]], ptr [[GEP_DST]], align 8
187; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
188; CHECK-NEXT:    [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
189; CHECK-NEXT:    [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
190; CHECK-NEXT:    [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
191; CHECK-NEXT:    [[L_4_1:%.*]] = load volatile i64, ptr [[GEP_SRC_4_1]], align 8
192; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
193; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
194; CHECK-NEXT:    store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
195; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
196; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
197; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
198; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
199; CHECK:       exit.unr-lcssa.loopexit:
200; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
201; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
202; CHECK:       exit.unr-lcssa:
203; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
204; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
205; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
206; CHECK:       loop.epil.preheader:
207; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
208; CHECK:       loop.epil:
209; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
210; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
211; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
212; CHECK-NEXT:    [[L_4_EPIL:%.*]] = load volatile i64, ptr [[GEP_SRC_4_EPIL]], align 8
213; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
214; CHECK-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
215; CHECK-NEXT:    store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
216; CHECK-NEXT:    br label [[EXIT]]
217; CHECK:       exit:
218; CHECK-NEXT:    ret void
219;
220entry:
221  %src.4 = getelementptr i8, ptr %src, i64 4
222  %src.12 = getelementptr i8, ptr %src, i64 12
223  br label %loop
224
225loop:
226  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
227  %gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
228  %l.12 = load i64, ptr %gep.src.12, align 8
229  %gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
230  %l.4 = load volatile i64, ptr %gep.src.4, align 8
231  %mul = mul i64 %l.12, %l.4
232  %gep.dst = getelementptr i64, ptr %dst, i64 %iv
233  store i64 %mul, ptr %gep.dst
234  %iv.next = add nuw nsw i64 %iv, 1
235  %c = icmp eq i64 %iv.next, %N
236  br i1 %c, label %exit, label %loop, !llvm.loop !1
237
238exit:
239  ret void
240}
241
242define void @cse_atomic_loads(ptr %src, ptr noalias %dst, i64 %N) {
243; CHECK-LABEL: define void @cse_atomic_loads(
244; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
245; CHECK-NEXT:  entry:
246; CHECK-NEXT:    [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
247; CHECK-NEXT:    [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
248; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
249; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
250; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
251; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
252; CHECK:       entry.new:
253; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
254; CHECK-NEXT:    br label [[LOOP:%.*]]
255; CHECK:       loop:
256; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
257; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
258; CHECK-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
259; CHECK-NEXT:    [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
260; CHECK-NEXT:    [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
261; CHECK-NEXT:    [[L_4:%.*]] = load atomic i64, ptr [[GEP_SRC_4]] unordered, align 8
262; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
263; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
264; CHECK-NEXT:    store i64 [[MUL]], ptr [[GEP_DST]], align 8
265; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
266; CHECK-NEXT:    [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
267; CHECK-NEXT:    [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
268; CHECK-NEXT:    [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
269; CHECK-NEXT:    [[L_4_1:%.*]] = load atomic i64, ptr [[GEP_SRC_4_1]] unordered, align 8
270; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
271; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
272; CHECK-NEXT:    store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
273; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
274; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
275; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
276; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
277; CHECK:       exit.unr-lcssa.loopexit:
278; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
279; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
280; CHECK:       exit.unr-lcssa:
281; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
282; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
283; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
284; CHECK:       loop.epil.preheader:
285; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
286; CHECK:       loop.epil:
287; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
288; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
289; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
290; CHECK-NEXT:    [[L_4_EPIL:%.*]] = load atomic i64, ptr [[GEP_SRC_4_EPIL]] unordered, align 8
291; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
292; CHECK-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
293; CHECK-NEXT:    store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
294; CHECK-NEXT:    br label [[EXIT]]
295; CHECK:       exit:
296; CHECK-NEXT:    ret void
297;
298entry:
299  %src.4 = getelementptr i8, ptr %src, i64 4
300  %src.12 = getelementptr i8, ptr %src, i64 12
301  br label %loop
302
303loop:
304  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
305  %gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
306  %l.12 = load i64, ptr %gep.src.12, align 8
307  %gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
308  %l.4 = load atomic i64, ptr %gep.src.4 unordered, align 8
309  %mul = mul i64 %l.12, %l.4
310  %gep.dst = getelementptr i64, ptr %dst, i64 %iv
311  store i64 %mul, ptr %gep.dst
312  %iv.next = add nuw nsw i64 %iv, 1
313  %c = icmp eq i64 %iv.next, %N
314  br i1 %c, label %exit, label %loop, !llvm.loop !1
315
316exit:
317  ret void
318}
319
320define void @cse_load_may_be_clobbered(ptr %src, ptr %dst, i64 %N) {
321; CHECK-LABEL: define void @cse_load_may_be_clobbered(
322; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
323; CHECK-NEXT:  entry:
324; CHECK-NEXT:    [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
325; CHECK-NEXT:    [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
326; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
327; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
328; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
329; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
330; CHECK:       entry.new:
331; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
332; CHECK-NEXT:    br label [[LOOP:%.*]]
333; CHECK:       loop:
334; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
335; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
336; CHECK-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
337; CHECK-NEXT:    [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
338; CHECK-NEXT:    [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
339; CHECK-NEXT:    [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
340; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
341; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
342; CHECK-NEXT:    store i64 [[MUL]], ptr [[GEP_DST]], align 8
343; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
344; CHECK-NEXT:    [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
345; CHECK-NEXT:    [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
346; CHECK-NEXT:    [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
347; CHECK-NEXT:    [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
348; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
349; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
350; CHECK-NEXT:    store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
351; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
352; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
353; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
354; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
355; CHECK:       exit.unr-lcssa.loopexit:
356; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
357; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
358; CHECK:       exit.unr-lcssa:
359; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
360; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
361; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
362; CHECK:       loop.epil.preheader:
363; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
364; CHECK:       loop.epil:
365; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
366; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
367; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
368; CHECK-NEXT:    [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
369; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
370; CHECK-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
371; CHECK-NEXT:    store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
372; CHECK-NEXT:    br label [[EXIT]]
373; CHECK:       exit:
374; CHECK-NEXT:    ret void
375;
376entry:
377  %src.4 = getelementptr i8, ptr %src, i64 4
378  %src.12 = getelementptr i8, ptr %src, i64 12
379  br label %loop
380
381loop:
382  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
383  %gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
384  %l.12 = load i64, ptr %gep.src.12, align 8
385  %gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
386  %l.4 = load i64, ptr %gep.src.4, align 8
387  %mul = mul i64 %l.12, %l.4
388  %gep.dst = getelementptr i64, ptr %dst, i64 %iv
389  store i64 %mul, ptr %gep.dst
390  %iv.next = add nuw nsw i64 %iv, 1
391  %c = icmp eq i64 %iv.next, %N
392  br i1 %c, label %exit, label %loop, !llvm.loop !1
393
394exit:
395  ret void
396}
397
398
399declare void @foo()
400
401define void @loop_body_with_dead_blocks(ptr %src) {
402; CHECK-LABEL: define void @loop_body_with_dead_blocks(
403; CHECK-SAME: ptr [[SRC:%.*]]) {
404; CHECK-NEXT:  entry:
405; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
406; CHECK:       outer.header.loopexit:
407; CHECK-NEXT:    br label [[OUTER_HEADER]]
408; CHECK:       outer.header:
409; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
410; CHECK:       loop.header:
411; CHECK-NEXT:    br label [[LOOP_BB:%.*]]
412; CHECK:       loop.bb.dead:
413; CHECK-NEXT:    unreachable
414; CHECK:       loop.bb:
415; CHECK-NEXT:    [[L_1:%.*]] = load i32, ptr [[SRC]], align 8
416; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i32 [[L_1]], 0
417; CHECK-NEXT:    br i1 [[C_1]], label [[OUTER_HEADER_LOOPEXIT:%.*]], label [[LOOP_LATCH:%.*]]
418; CHECK:       loop.latch:
419; CHECK-NEXT:    call void @foo()
420; CHECK-NEXT:    [[L_2:%.*]] = load i32, ptr [[SRC]], align 8
421; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i32 [[L_2]], 1
422; CHECK-NEXT:    br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_HEADER_1:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
423; CHECK:       loop.header.1:
424; CHECK-NEXT:    br label [[LOOP_BB_1:%.*]]
425; CHECK:       loop.bb.1:
426; CHECK-NEXT:    [[C_1_1:%.*]] = icmp eq i32 [[L_2]], 0
427; CHECK-NEXT:    br i1 [[C_1_1]], label [[OUTER_HEADER_LOOPEXIT]], label [[LOOP_LATCH_1:%.*]]
428; CHECK:       loop.latch.1:
429; CHECK-NEXT:    call void @foo()
430; CHECK-NEXT:    [[L_2_1:%.*]] = load i32, ptr [[SRC]], align 8
431; CHECK-NEXT:    [[C_2_1:%.*]] = icmp eq i32 [[L_2_1]], 1
432; CHECK-NEXT:    br i1 [[C_2_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
433; CHECK:       exit:
434; CHECK-NEXT:    ret void
435;
436entry:
437  br label %outer.header
438
439outer.header:
440  br label %loop.header
441
442loop.header:
443  br label %loop.bb
444
445loop.bb.dead:
446  br label %loop.bb
447
448loop.bb:
449  %l.1 = load i32, ptr %src, align 8
450  %c.1 = icmp eq i32 %l.1, 0
451  br i1 %c.1, label %outer.header, label %loop.latch
452
453loop.latch:
454  call void @foo()
455  %l.2 = load i32, ptr %src, align 8
456  %c.2 = icmp eq i32 %l.2, 1
457  br i1 %c.2, label %exit, label %loop.header, !llvm.loop !1
458
459exit:
460  ret void
461}
462
463!0 = !{!"llvm.loop.mustprogress"}
464!1 = distinct !{!1, !0, !2}
465!2 = !{!"llvm.loop.unroll.count", i32 2}
466;.
467; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
468; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
469; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
470; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
471; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
472; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
473; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
474; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META8:![0-9]+]]}
475; CHECK: [[META8]] = !{!"llvm.loop.unroll.count", i32 2}
476; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
477;.
478