1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -S -passes="default<O1>" -mtriple aarch64 -mcpu=cortex-a55 | FileCheck %s -check-prefix=CHECK-A55 3; RUN: opt < %s -S -passes="default<O1>" -mtriple aarch64 | FileCheck %s -check-prefix=CHECK-GENERIC 4 5; Testing that, while runtime unrolling is performed on in-order cores (such as the cortex-a55), it is not performed when -mcpu is not specified 6define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg_3) { 7; CHECK-A55-LABEL: @runtime_unroll_generic( 8; CHECK-A55-NEXT: entry: 9; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 10; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] 11; CHECK-A55: for.body6.preheader: 12; CHECK-A55-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[ARG_0]] to i64 13; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 14; CHECK-A55-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_0]], 4 15; CHECK-A55-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW:%.*]] 16; CHECK-A55: for.body6.preheader.new: 17; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 18; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]] 19; CHECK-A55: for.body6: 20; CHECK-A55-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY6]] ] 21; CHECK-A55-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] 22; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]] 23; CHECK-A55-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 24; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 25; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]] 26; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2 27; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP2]] to i32 28; CHECK-A55-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]] 29; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]] 30; CHECK-A55-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4 31; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP3]] 32; CHECK-A55-NEXT: store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4 33; CHECK-A55-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1 34; CHECK-A55-NEXT: [[ARRAYIDX10_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT]] 35; CHECK-A55-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX10_1]], align 2 36; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32 37; CHECK-A55-NEXT: [[ARRAYIDX14_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT]] 38; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX14_1]], align 2 39; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP5]] to i32 40; CHECK-A55-NEXT: [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]] 41; CHECK-A55-NEXT: [[ARRAYIDX20_1:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT]] 42; CHECK-A55-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX20_1]], align 4 43; CHECK-A55-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[TMP6]] 44; CHECK-A55-NEXT: store i32 [[ADD21_1]], ptr [[ARRAYIDX20_1]], align 4 45; CHECK-A55-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2 46; CHECK-A55-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_1]] 47; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX10_2]], align 2 48; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32 49; CHECK-A55-NEXT: [[ARRAYIDX14_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_1]] 50; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX14_2]], align 2 51; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32 52; CHECK-A55-NEXT: [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]] 53; CHECK-A55-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_1]] 54; CHECK-A55-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX20_2]], align 4 55; CHECK-A55-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[TMP9]] 56; CHECK-A55-NEXT: store i32 [[ADD21_2]], ptr [[ARRAYIDX20_2]], align 4 57; CHECK-A55-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3 58; CHECK-A55-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_2]] 59; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10_3]], align 2 60; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP10]] to i32 61; CHECK-A55-NEXT: [[ARRAYIDX14_3:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_2]] 62; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX14_3]], align 2 63; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP11]] to i32 64; CHECK-A55-NEXT: [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]] 65; CHECK-A55-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_2]] 66; CHECK-A55-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX20_3]], align 4 67; CHECK-A55-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[TMP12]] 68; CHECK-A55-NEXT: store i32 [[ADD21_3]], ptr [[ARRAYIDX20_3]], align 4 69; CHECK-A55-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 70; CHECK-A55-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4 71; CHECK-A55-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]] 72; CHECK-A55-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]] 73; CHECK-A55: for.end.loopexit.unr-lcssa: 74; CHECK-A55-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY6]] ] 75; CHECK-A55-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 76; CHECK-A55-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]] 77; CHECK-A55: for.body6.epil: 78; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_UNR]] 79; CHECK-A55-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL]], align 2 80; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP13]] to i32 81; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_UNR]] 82; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL]], align 2 83; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP14]] to i32 84; CHECK-A55-NEXT: [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]] 85; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_UNR]] 86; CHECK-A55-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL]], align 4 87; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP15]] 88; CHECK-A55-NEXT: store i32 [[ADD21_EPIL]], ptr [[ARRAYIDX20_EPIL]], align 4 89; CHECK-A55-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1 90; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]] 91; CHECK-A55: for.body6.epil.1: 92; CHECK-A55-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1 93; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL]] 94; CHECK-A55-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_1]], align 2 95; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP16]] to i32 96; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL]] 97; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_1]], align 2 98; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP17]] to i32 99; CHECK-A55-NEXT: [[MUL16_EPIL_1:%.*]] = mul nsw i32 [[CONV15_EPIL_1]], [[CONV_EPIL_1]] 100; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL_1:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL]] 101; CHECK-A55-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_1]], align 4 102; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP18]] 103; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_1]], ptr [[ARRAYIDX20_EPIL_1]], align 4 104; CHECK-A55-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2 105; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_2:%.*]] 106; CHECK-A55: for.body6.epil.2: 107; CHECK-A55-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2 108; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL_1]] 109; CHECK-A55-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_2]], align 2 110; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP19]] to i32 111; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL_1]] 112; CHECK-A55-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_2]], align 2 113; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP20]] to i32 114; CHECK-A55-NEXT: [[MUL16_EPIL_2:%.*]] = mul nsw i32 [[CONV15_EPIL_2]], [[CONV_EPIL_2]] 115; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL_2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL_1]] 116; CHECK-A55-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_2]], align 4 117; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP21]] 118; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_2]], ptr [[ARRAYIDX20_EPIL_2]], align 4 119; CHECK-A55-NEXT: br label [[FOR_END]] 120; CHECK-A55: for.end: 121; CHECK-A55-NEXT: ret void 122; 123; CHECK-GENERIC-LABEL: @runtime_unroll_generic( 124; CHECK-GENERIC-NEXT: entry: 125; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 126; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] 127; CHECK-GENERIC: for.body6.preheader: 128; CHECK-GENERIC-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[ARG_0]] to i64 129; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] 130; CHECK-GENERIC: for.body6: 131; CHECK-GENERIC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY6]] ] 132; CHECK-GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]] 133; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 134; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 135; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]] 136; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2 137; CHECK-GENERIC-NEXT: [[CONV15:%.*]] = sext i16 [[TMP1]] to i32 138; CHECK-GENERIC-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]] 139; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]] 140; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4 141; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]] 142; CHECK-GENERIC-NEXT: store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4 143; CHECK-GENERIC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 144; CHECK-GENERIC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 145; CHECK-GENERIC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]] 146; CHECK-GENERIC: for.end: 147; CHECK-GENERIC-NEXT: ret void 148; 149entry: 150 %arg_0.addr = alloca i32, align 4 151 %arg_1.addr = alloca ptr, align 8 152 %arg_2.addr = alloca ptr, align 8 153 %arg_3.addr = alloca ptr, align 8 154 %k = alloca i32, align 4 155 store i32 %arg_0, ptr %arg_0.addr, align 4 156 store ptr %arg_1, ptr %arg_1.addr, align 8 157 store ptr %arg_2, ptr %arg_2.addr, align 8 158 store ptr %arg_3, ptr %arg_3.addr, align 8 159 br label %for.cond 160 161for.cond: ; preds = %entry 162 br label %for.body 163 164for.body: ; preds = %for.cond 165 br label %for.cond1 166 167for.cond1: ; preds = %for.body 168 br label %for.body3 169 170for.body3: ; preds = %for.cond1 171 store i32 0, ptr %k, align 4 172 br label %for.cond4 173 174for.cond4: ; preds = %for.inc, %for.body3 175 %0 = load i32, ptr %k, align 4 176 %1 = load i32, ptr %arg_0.addr, align 4 177 %cmp5 = icmp ult i32 %0, %1 178 br i1 %cmp5, label %for.body6, label %for.end 179 180for.body6: ; preds = %for.cond4 181 %2 = load ptr, ptr %arg_2.addr, align 8 182 %idx.ext = zext i32 %0 to i64 183 %arrayidx10 = getelementptr inbounds i16, ptr %2, i64 %idx.ext 184 %3 = load i16, ptr %arrayidx10, align 2 185 %conv = sext i16 %3 to i32 186 %4 = load ptr, ptr %arg_3.addr, align 8 187 %arrayidx14 = getelementptr inbounds i16, ptr %4, i64 %idx.ext 188 %5 = load i16, ptr %arrayidx14, align 2 189 %conv15 = sext i16 %5 to i32 190 %mul16 = mul nsw i32 %conv, %conv15 191 %6 = load ptr, ptr %arg_1.addr, align 8 192 %arrayidx20 = getelementptr inbounds i32, ptr %6, i64 %idx.ext 193 %7 = load i32, ptr %arrayidx20, align 4 194 %add21 = add nsw i32 %7, %mul16 195 store i32 %add21, ptr %arrayidx20, align 4 196 br label %for.inc 197 198for.inc: ; preds = %for.body6 199 %8 = load i32, ptr %k, align 4 200 %inc = add i32 %8, 1 201 store i32 %inc, ptr %k, align 4 202 br label %for.cond4, !llvm.loop !0 203 204for.end: ; preds = %for.cond4 205 ret void 206} 207 208!0 = distinct !{!0, !1} 209!1 = !{!"llvm.loop.mustprogress"} 210