1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -passes="default<O3>" -S %s | FileCheck %s 3 4target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7; FIXME: !llvm.access.group should be preserved, loop should be vectorized. 8; End-to-end test for https://github.com/llvm/llvm-project/issues/115595. 9define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 { 10; CHECK-LABEL: define void @test( 11; CHECK-SAME: i32 noundef [[NFACE:%.*]], i32 noundef [[NCELL:%.*]], ptr noalias noundef readonly captures(none) [[FACE_CELL:%.*]], ptr noalias noundef readonly captures(none) [[X:%.*]], ptr noalias noundef captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { 12; CHECK-NEXT: [[ENTRY:.*:]] 13; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[NFACE]], 0 14; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] 15; CHECK: [[FOR_BODY_PREHEADER]]: 16; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64 17; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]] 18; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4 19; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]] 20; CHECK: [[VECTOR_PH]]: 21; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644 22; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 23; CHECK: [[VECTOR_BODY]]: 24; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 25; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]] 26; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]] 27; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]] 28; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] 29; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> 30; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]] 31; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64> 32; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]] 33; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]] 34; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] 35; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]] 36; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]] 37; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] 38; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4 39; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]] 40; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 41; CHECK: [[MIDDLE_BLOCK]]: 42; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]] 43; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]] 44; CHECK: [[FOR_BODY_PREHEADER14]]: 45; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ] 46; CHECK-NEXT: br label %[[FOR_BODY:.*]] 47; CHECK: [[FOR_COND_CLEANUP]]: 48; CHECK-NEXT: ret void 49; CHECK: [[FOR_BODY]]: 50; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ] 51; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]] 52; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] 53; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]] 54; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] 55; CHECK-NEXT: [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64 56; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]] 57; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64 58; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]] 59; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] 60; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] 61; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]] 62; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]] 63; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] 64; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 65; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] 66; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 67; 68entry: 69 %nface.addr = alloca i32, align 4 70 %ncell.addr = alloca i32, align 4 71 %face_cell.addr = alloca ptr, align 8 72 %x.addr = alloca ptr, align 8 73 %y.addr = alloca ptr, align 8 74 %il = alloca i32, align 4 75 %ir = alloca i32, align 4 76 %iface = alloca i32, align 4 77 store i32 %nface, ptr %nface.addr, align 4, !tbaa !6 78 store i32 %ncell, ptr %ncell.addr, align 4, !tbaa !6 79 store ptr %face_cell, ptr %face_cell.addr, align 8, !tbaa !10 80 store ptr %x, ptr %x.addr, align 8, !tbaa !10 81 store ptr %y, ptr %y.addr, align 8, !tbaa !10 82 call void @llvm.lifetime.start.p0(i64 4, ptr %il) #3 83 call void @llvm.lifetime.start.p0(i64 4, ptr %ir) #3 84 call void @llvm.lifetime.start.p0(i64 4, ptr %iface) #3 85 store i32 0, ptr %iface, align 4, !tbaa !6 86 br label %for.cond 87 88for.cond: 89 %0 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 90 %1 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12 91 %cmp = icmp slt i32 %0, %1 92 br i1 %cmp, label %for.body, label %for.cond.cleanup 93 94for.cond.cleanup: 95 call void @llvm.lifetime.end.p0(i64 4, ptr %iface) #3, !llvm.access.group !12 96 br label %for.end 97 98for.body: 99 %2 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12 100 %3 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 101 %idxprom = sext i32 %3 to i64 102 %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom 103 %4 = load i32, ptr %arrayidx, align 4, !tbaa !6, !llvm.access.group !12 104 store i32 %4, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 105 %5 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12 106 %6 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 107 %7 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12 108 %add = add nsw i32 %6, %7 109 %idxprom1 = sext i32 %add to i64 110 %arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1 111 %8 = load i32, ptr %arrayidx2, align 4, !tbaa !6, !llvm.access.group !12 112 store i32 %8, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12 113 %9 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12 114 %10 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 115 %idxprom3 = sext i32 %10 to i64 116 %arrayidx4 = getelementptr inbounds double, ptr %9, i64 %idxprom3 117 %11 = load ptr, ptr %x.addr, align 8, !tbaa !10, !llvm.access.group !12 118 %12 = load i32, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12 119 %idxprom5 = sext i32 %12 to i64 120 %arrayidx6 = getelementptr inbounds double, ptr %11, i64 %idxprom5 121 %call = call noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %arrayidx4, ptr noundef nonnull align 8 dereferenceable(8) %arrayidx6), !llvm.access.group !12 122 %13 = load double, ptr %call, align 8, !tbaa !13, !llvm.access.group !12 123 %14 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12 124 %15 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 125 %idxprom7 = sext i32 %15 to i64 126 %arrayidx8 = getelementptr inbounds double, ptr %14, i64 %idxprom7 127 store double %13, ptr %arrayidx8, align 8, !tbaa !13, !llvm.access.group !12 128 br label %for.inc 129 130for.inc: 131 %16 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 132 %inc = add nsw i32 %16, 1 133 store i32 %inc, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 134 br label %for.cond, !llvm.loop !15 135 136for.end: 137 call void @llvm.lifetime.end.p0(i64 4, ptr %ir) #3 138 call void @llvm.lifetime.end.p0(i64 4, ptr %il) #3 139 ret void 140} 141 142declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 143 144define linkonce_odr noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %__a, ptr noundef nonnull align 8 dereferenceable(8) %__b) #2 { 145entry: 146 %retval = alloca ptr, align 8 147 %__a.addr = alloca ptr, align 8 148 %__b.addr = alloca ptr, align 8 149 store ptr %__a, ptr %__a.addr, align 8, !tbaa !10 150 store ptr %__b, ptr %__b.addr, align 8, !tbaa !10 151 %0 = load ptr, ptr %__a.addr, align 8, !tbaa !10 152 %1 = load double, ptr %0, align 8, !tbaa !13 153 %2 = load ptr, ptr %__b.addr, align 8, !tbaa !10 154 %3 = load double, ptr %2, align 8, !tbaa !13 155 %cmp = fcmp fast olt double %1, %3 156 br i1 %cmp, label %if.then, label %if.end 157 158if.then: 159 %4 = load ptr, ptr %__b.addr, align 8, !tbaa !10 160 store ptr %4, ptr %retval, align 8 161 br label %return 162 163if.end: 164 %5 = load ptr, ptr %__a.addr, align 8, !tbaa !10 165 store ptr %5, ptr %retval, align 8 166 br label %return 167 168return: 169 %6 = load ptr, ptr %retval, align 8 170 ret ptr %6 171} 172 173declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 174 175attributes #0 = { mustprogress "target-cpu" = "skylake-avx512" } 176attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 177 178!6 = !{!7, !7, i64 0} 179!7 = !{!"int", !8, i64 0} 180!8 = !{!"omnipotent char", !9, i64 0} 181!9 = !{!"Simple C++ TBAA"} 182!10 = !{!11, !11, i64 0} 183!11 = !{!"any pointer", !8, i64 0} 184!12 = distinct !{} 185!13 = !{!14, !14, i64 0} 186!14 = !{!"double", !8, i64 0} 187!15 = distinct !{!15, !16, !17, !18} 188!16 = !{!"llvm.loop.mustprogress"} 189!17 = !{!"llvm.loop.parallel_accesses", !12} 190!18 = !{!"llvm.loop.vectorize.enable", i1 true} 191 192;. 193; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} 194; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} 195; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} 196; CHECK: [[META3]] = !{!"Simple C++ TBAA"} 197; CHECK: [[ACC_GRP4]] = distinct !{} 198; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} 199; CHECK: [[META6]] = !{!"double", [[META2]], i64 0} 200; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} 201; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} 202; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]} 203; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} 204; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"} 205; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]} 206;. 207