xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
3
4target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
8; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
9define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 {
10; CHECK-LABEL: define void @test(
11; CHECK-SAME: i32 noundef [[NFACE:%.*]], i32 noundef [[NCELL:%.*]], ptr noalias noundef readonly captures(none) [[FACE_CELL:%.*]], ptr noalias noundef readonly captures(none) [[X:%.*]], ptr noalias noundef captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
12; CHECK-NEXT:  [[ENTRY:.*:]]
13; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[NFACE]], 0
14; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
15; CHECK:       [[FOR_BODY_PREHEADER]]:
16; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
17; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
18; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
19; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
20; CHECK:       [[VECTOR_PH]]:
21; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
22; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
23; CHECK:       [[VECTOR_BODY]]:
24; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
26; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
27; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
28; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
29; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
30; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
31; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
32; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
33; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
34; CHECK-NEXT:    [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
35; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
36; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
37; CHECK-NEXT:    tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
38; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
39; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
40; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
41; CHECK:       [[MIDDLE_BLOCK]]:
42; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
43; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
44; CHECK:       [[FOR_BODY_PREHEADER14]]:
45; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ]
46; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
47; CHECK:       [[FOR_COND_CLEANUP]]:
48; CHECK-NEXT:    ret void
49; CHECK:       [[FOR_BODY]]:
50; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
51; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
52; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
53; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
54; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
55; CHECK-NEXT:    [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64
56; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
57; CHECK-NEXT:    [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
58; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
59; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
60; CHECK-NEXT:    [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
61; CHECK-NEXT:    [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
62; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
63; CHECK-NEXT:    store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
64; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
65; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
66; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
67;
68entry:
69  %nface.addr = alloca i32, align 4
70  %ncell.addr = alloca i32, align 4
71  %face_cell.addr = alloca ptr, align 8
72  %x.addr = alloca ptr, align 8
73  %y.addr = alloca ptr, align 8
74  %il = alloca i32, align 4
75  %ir = alloca i32, align 4
76  %iface = alloca i32, align 4
77  store i32 %nface, ptr %nface.addr, align 4, !tbaa !6
78  store i32 %ncell, ptr %ncell.addr, align 4, !tbaa !6
79  store ptr %face_cell, ptr %face_cell.addr, align 8, !tbaa !10
80  store ptr %x, ptr %x.addr, align 8, !tbaa !10
81  store ptr %y, ptr %y.addr, align 8, !tbaa !10
82  call void @llvm.lifetime.start.p0(i64 4, ptr %il) #3
83  call void @llvm.lifetime.start.p0(i64 4, ptr %ir) #3
84  call void @llvm.lifetime.start.p0(i64 4, ptr %iface) #3
85  store i32 0, ptr %iface, align 4, !tbaa !6
86  br label %for.cond
87
88for.cond:
89  %0 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
90  %1 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
91  %cmp = icmp slt i32 %0, %1
92  br i1 %cmp, label %for.body, label %for.cond.cleanup
93
94for.cond.cleanup:
95  call void @llvm.lifetime.end.p0(i64 4, ptr %iface) #3, !llvm.access.group !12
96  br label %for.end
97
98for.body:
99  %2 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
100  %3 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
101  %idxprom = sext i32 %3 to i64
102  %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom
103  %4 = load i32, ptr %arrayidx, align 4, !tbaa !6, !llvm.access.group !12
104  store i32 %4, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
105  %5 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
106  %6 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
107  %7 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
108  %add = add nsw i32 %6, %7
109  %idxprom1 = sext i32 %add to i64
110  %arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1
111  %8 = load i32, ptr %arrayidx2, align 4, !tbaa !6, !llvm.access.group !12
112  store i32 %8, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
113  %9 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
114  %10 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
115  %idxprom3 = sext i32 %10 to i64
116  %arrayidx4 = getelementptr inbounds double, ptr %9, i64 %idxprom3
117  %11 = load ptr, ptr %x.addr, align 8, !tbaa !10, !llvm.access.group !12
118  %12 = load i32, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
119  %idxprom5 = sext i32 %12 to i64
120  %arrayidx6 = getelementptr inbounds double, ptr %11, i64 %idxprom5
121  %call = call noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %arrayidx4, ptr noundef nonnull align 8 dereferenceable(8) %arrayidx6), !llvm.access.group !12
122  %13 = load double, ptr %call, align 8, !tbaa !13, !llvm.access.group !12
123  %14 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
124  %15 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
125  %idxprom7 = sext i32 %15 to i64
126  %arrayidx8 = getelementptr inbounds double, ptr %14, i64 %idxprom7
127  store double %13, ptr %arrayidx8, align 8, !tbaa !13, !llvm.access.group !12
128  br label %for.inc
129
130for.inc:
131  %16 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
132  %inc = add nsw i32 %16, 1
133  store i32 %inc, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
134  br label %for.cond, !llvm.loop !15
135
136for.end:
137  call void @llvm.lifetime.end.p0(i64 4, ptr %ir) #3
138  call void @llvm.lifetime.end.p0(i64 4, ptr %il) #3
139  ret void
140}
141
142declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
143
144define linkonce_odr noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %__a, ptr noundef nonnull align 8 dereferenceable(8) %__b) #2 {
145entry:
146  %retval = alloca ptr, align 8
147  %__a.addr = alloca ptr, align 8
148  %__b.addr = alloca ptr, align 8
149  store ptr %__a, ptr %__a.addr, align 8, !tbaa !10
150  store ptr %__b, ptr %__b.addr, align 8, !tbaa !10
151  %0 = load ptr, ptr %__a.addr, align 8, !tbaa !10
152  %1 = load double, ptr %0, align 8, !tbaa !13
153  %2 = load ptr, ptr %__b.addr, align 8, !tbaa !10
154  %3 = load double, ptr %2, align 8, !tbaa !13
155  %cmp = fcmp fast olt double %1, %3
156  br i1 %cmp, label %if.then, label %if.end
157
158if.then:
159  %4 = load ptr, ptr %__b.addr, align 8, !tbaa !10
160  store ptr %4, ptr %retval, align 8
161  br label %return
162
163if.end:
164  %5 = load ptr, ptr %__a.addr, align 8, !tbaa !10
165  store ptr %5, ptr %retval, align 8
166  br label %return
167
168return:
169  %6 = load ptr, ptr %retval, align 8
170  ret ptr %6
171}
172
173declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
174
175attributes #0 = { mustprogress "target-cpu" = "skylake-avx512" }
176attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
177
178!6 = !{!7, !7, i64 0}
179!7 = !{!"int", !8, i64 0}
180!8 = !{!"omnipotent char", !9, i64 0}
181!9 = !{!"Simple C++ TBAA"}
182!10 = !{!11, !11, i64 0}
183!11 = !{!"any pointer", !8, i64 0}
184!12 = distinct !{}
185!13 = !{!14, !14, i64 0}
186!14 = !{!"double", !8, i64 0}
187!15 = distinct !{!15, !16, !17, !18}
188!16 = !{!"llvm.loop.mustprogress"}
189!17 = !{!"llvm.loop.parallel_accesses", !12}
190!18 = !{!"llvm.loop.vectorize.enable", i1 true}
191
192;.
193; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
194; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
195; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
196; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
197; CHECK: [[ACC_GRP4]] = distinct !{}
198; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
199; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
200; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
201; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
202; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
203; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
204; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
205; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]}
206;.
207