xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
2; RUN: opt -passes="default<O1>" -S < %s | FileCheck --check-prefixes=O1 %s
3; RUN: opt -passes="default<O2>" -S < %s | FileCheck --check-prefixes=O2 %s
4; RUN: opt -passes="default<O3>" -S < %s | FileCheck --check-prefixes=O3 %s
5
6target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
7target triple = "x86_64-unknown-linux-gnu"
8
9%"class.std::vector" = type { %"struct.std::_Vector_base" }
10%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<int, std::allocator<int>>::_Vector_impl" }
11%"struct.std::_Vector_base<int, std::allocator<int>>::_Vector_impl" = type { %"struct.std::_Vector_base<int, std::allocator<int>>::_Vector_impl_data" }
12%"struct.std::_Vector_base<int, std::allocator<int>>::_Vector_impl_data" = type { ptr, ptr, ptr }
13
14$_ZNSt6vectorIiSaIiEEixEm = comdat any
15
16define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 dereferenceable(24) %data, i64 noundef %numElems) {
17; O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
18; O1-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
19; O1-NEXT:  entry:
20; O1-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
21; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
22; O1-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
23; O1:       for.cond1.preheader:
24; O1-NEXT:    [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
25; O1-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]]
26; O1:       for.cond.cleanup:
27; O1-NEXT:    ret void
28; O1:       for.cond.cleanup3:
29; O1-NEXT:    [[INC7]] = add nuw nsw i64 [[I_06]], 1
30; O1-NEXT:    [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
31; O1-NEXT:    br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
32; O1:       for.body4:
33; O1-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
34; O1-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
35; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2:![0-9]+]]
36; O1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
37; O1-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2]]
38; O1-NEXT:    [[INC5]] = add nuw i64 [[J_05]], 1
39; O1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]]
40; O1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]]
41;
42; O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
43; O2-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
44; O2-NEXT:  entry:
45; O2-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
46; O2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
47; O2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
48; O2-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
49; O2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]]
50; O2-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
51; O2:       for.cond1.preheader:
52; O2-NEXT:    [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
53; O2-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]]
54; O2:       for.body4.preheader:
55; O2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]]
56; O2:       vector.body:
57; O2-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
58; O2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
59; O2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
60; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
61; O2-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
62; O2-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
63; O2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1)
64; O2-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
65; O2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
66; O2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
67; O2-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
68; O2-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
69; O2:       middle.block:
70; O2-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]]
71; O2:       for.body4.preheader9:
72; O2-NEXT:    [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
73; O2-NEXT:    br label [[FOR_BODY4:%.*]]
74; O2:       for.cond.cleanup:
75; O2-NEXT:    ret void
76; O2:       for.cond.cleanup3:
77; O2-NEXT:    [[INC7]] = add nuw nsw i64 [[I_06]], 1
78; O2-NEXT:    [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
79; O2-NEXT:    br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]]
80; O2:       for.body4:
81; O2-NEXT:    [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER9]] ]
82; O2-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
83; O2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]]
84; O2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
85; O2-NEXT:    store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]]
86; O2-NEXT:    [[INC5]] = add nuw i64 [[J_05]], 1
87; O2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]]
88; O2-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]]
89;
90; O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
91; O3-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
92; O3-NEXT:  entry:
93; O3-NEXT:    [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
94; O3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8
95; O3-NEXT:    br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
96; O3:       for.cond1.preheader.us.preheader:
97; O3-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
98; O3-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
99; O3-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]]
100; O3-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
101; O3:       for.cond1.preheader.us:
102; O3-NEXT:    [[I_06_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
103; O3-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]]
104; O3:       vector.body:
105; O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
106; O3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
107; O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
108; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
109; O3-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
110; O3-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
111; O3-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1)
112; O3-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
113; O3-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
114; O3-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
115; O3-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
116; O3-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
117; O3:       middle.block:
118; O3-NEXT:    br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US_PREHEADER]]
119; O3:       for.body4.us.preheader:
120; O3-NEXT:    [[J_05_US_PH:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
121; O3-NEXT:    br label [[FOR_BODY4_US:%.*]]
122; O3:       for.body4.us:
123; O3-NEXT:    [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], [[FOR_BODY4_US]] ], [ [[J_05_US_PH]], [[FOR_BODY4_US_PREHEADER]] ]
124; O3-NEXT:    [[ADD_PTR_I_US:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05_US]]
125; O3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]]
126; O3-NEXT:    [[INC_US:%.*]] = add nsw i32 [[TMP6]], 1
127; O3-NEXT:    store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]]
128; O3-NEXT:    [[INC5_US]] = add nuw i64 [[J_05_US]], 1
129; O3-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5_US]], [[NUMELEMS]]
130; O3-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]]
131; O3:       for.cond1.for.cond.cleanup3_crit_edge.us:
132; O3-NEXT:    [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1
133; O3-NEXT:    [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100
134; O3-NEXT:    br i1 [[EXITCOND8_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]]
135; O3:       for.cond.cleanup:
136; O3-NEXT:    ret void
137;
138entry:
139  %data.addr = alloca ptr, align 8
140  %numElems.addr = alloca i64, align 8
141  %i = alloca i64, align 8
142  %cleanup.dest.slot = alloca i32, align 4
143  %j = alloca i64, align 8
144  store ptr %data, ptr %data.addr, align 8, !tbaa !3
145  store i64 %numElems, ptr %numElems.addr, align 8, !tbaa !7
146  call void @llvm.lifetime.start.p0(i64 8, ptr %i)
147  store i64 0, ptr %i, align 8, !tbaa !7
148  br label %for.cond
149
150for.cond:
151  %0 = load i64, ptr %i, align 8, !tbaa !7
152  %cmp = icmp ult i64 %0, 100
153  br i1 %cmp, label %for.body, label %for.cond.cleanup
154
155for.cond.cleanup:
156  store i32 2, ptr %cleanup.dest.slot, align 4
157  call void @llvm.lifetime.end.p0(i64 8, ptr %i)
158  br label %for.end8
159
160for.body:
161  call void @llvm.lifetime.start.p0(i64 8, ptr %j)
162  store i64 0, ptr %j, align 8, !tbaa !7
163  br label %for.cond1
164
165for.cond1:
166  %1 = load i64, ptr %j, align 8, !tbaa !7
167  %2 = load i64, ptr %numElems.addr, align 8, !tbaa !7
168  %cmp2 = icmp ult i64 %1, %2
169  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
170
171for.cond.cleanup3:
172  store i32 5, ptr %cleanup.dest.slot, align 4
173  call void @llvm.lifetime.end.p0(i64 8, ptr %j)
174  br label %for.end
175
176for.body4:
177  %3 = load ptr, ptr %data.addr, align 8, !tbaa !3
178  %4 = load i64, ptr %j, align 8, !tbaa !7
179  %call = call noundef nonnull align 4 dereferenceable(4) ptr @_ZNSt6vectorIiSaIiEEixEm(ptr noundef nonnull align 8 dereferenceable(24) %3, i64 noundef %4)
180  %5 = load i32, ptr %call, align 4, !tbaa !9
181  %inc = add nsw i32 %5, 1
182  store i32 %inc, ptr %call, align 4, !tbaa !9
183  br label %for.inc
184
185for.inc:
186  %6 = load i64, ptr %j, align 8, !tbaa !7
187  %inc5 = add i64 %6, 1
188  store i64 %inc5, ptr %j, align 8, !tbaa !7
189  br label %for.cond1, !llvm.loop !11
190
191for.end:
192  br label %for.inc6
193
194for.inc6:
195  %7 = load i64, ptr %i, align 8, !tbaa !7
196  %inc7 = add i64 %7, 1
197  store i64 %inc7, ptr %i, align 8, !tbaa !7
198  br label %for.cond, !llvm.loop !13
199
200for.end8:
201  ret void
202}
203
204declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
205
206define linkonce_odr dso_local noundef nonnull align 4 dereferenceable(4) ptr @_ZNSt6vectorIiSaIiEEixEm(ptr noundef nonnull align 8 dereferenceable(24) %this, i64 noundef %__n) comdat align 2 {
207entry:
208  %this.addr = alloca ptr, align 8
209  %__n.addr = alloca i64, align 8
210  store ptr %this, ptr %this.addr, align 8, !tbaa !3
211  store i64 %__n, ptr %__n.addr, align 8, !tbaa !14
212  %this1 = load ptr, ptr %this.addr, align 8
213  %_M_start = getelementptr inbounds %"struct.std::_Vector_base<int, std::allocator<int>>::_Vector_impl_data", ptr %this1, i32 0, i32 0
214  %0 = load ptr, ptr %_M_start, align 8, !tbaa !16
215  %1 = load i64, ptr %__n.addr, align 8, !tbaa !14
216  %add.ptr = getelementptr inbounds i32, ptr %0, i64 %1
217  ret ptr %add.ptr
218}
219
220declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
221
222!0 = !{i32 1, !"wchar_size", i32 4}
223!1 = !{i32 7, !"uwtable", i32 2}
224!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 69297cf639044acf48dd5d9b39b95c54dd50561d)"}
225!3 = !{!4, !4, i64 0}
226!4 = !{!"any pointer", !5, i64 0}
227!5 = !{!"omnipotent char", !6, i64 0}
228!6 = !{!"Simple C++ TBAA"}
229!7 = !{!8, !8, i64 0}
230!8 = !{!"long long", !5, i64 0}
231!9 = !{!10, !10, i64 0}
232!10 = !{!"int", !5, i64 0}
233!11 = distinct !{!11, !12}
234!12 = !{!"llvm.loop.mustprogress"}
235!13 = distinct !{!13, !12}
236!14 = !{!15, !15, i64 0}
237!15 = !{!"long", !5, i64 0}
238!16 = !{!17, !4, i64 0}
239!17 = !{!"_ZTSNSt12_Vector_baseIiSaIiEE17_Vector_impl_dataE", !4, i64 0, !4, i64 8, !4, i64 16}
240