1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5 6; Float pattern: 7; Check vectorization of reduction code which has an fadd instruction after 8; an fcmp instruction which compares an array element and 0. 9; 10; float fcmp_0_fadd_select1(ptr restrict x, const int N) { 11; float sum = 0. 12; for (int i = 0; i < N; ++i) 13; if (x[i] > (float)0.) 14; sum += x[i]; 15; return sum; 16; } 17 18define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly { 19; CHECK-LABEL: define float @fcmp_0_fadd_select1( 20; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 21; CHECK-NEXT: [[ENTRY:.*]]: 22; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 23; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 24; CHECK: [[FOR_HEADER]]: 25; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 26; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 27; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 28; CHECK: [[VECTOR_PH]]: 29; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 30; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 31; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 32; CHECK: [[VECTOR_BODY]]: 33; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 34; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 35; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 36; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 37; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 38; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 39; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer 40; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 41; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]] 42; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 43; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 44; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 45; CHECK: [[MIDDLE_BLOCK]]: 46; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) 47; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 48; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 49; CHECK: [[SCALAR_PH]]: 50; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 51; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 52; CHECK-NEXT: br label %[[FOR_BODY:.*]] 53; CHECK: [[FOR_BODY]]: 54; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 55; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 56; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 57; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 58; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00 59; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]] 60; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]] 61; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 62; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 63; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 64; CHECK: [[FOR_END_LOOPEXIT]]: 65; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 66; CHECK-NEXT: br label %[[FOR_END]] 67; CHECK: [[FOR_END]]: 68; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 69; CHECK-NEXT: ret float [[TMP9]] 70; 71entry: 72 %cmp.1 = icmp sgt i32 %N, 0 73 br i1 %cmp.1, label %for.header, label %for.end 74 75for.header: ; preds = %entry 76 %zext = zext i32 %N to i64 77 br label %for.body 78 79for.body: ; preds = %header, %for.body 80 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 81 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 82 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 83 %0 = load float, ptr %arrayidx, align 4 84 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00 85 %add = fadd fast float %0, %sum.1 86 %sum.2 = select i1 %cmp.2, float %add, float %sum.1 87 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 88 %exitcond = icmp eq i64 %indvars.iv.next, %zext 89 br i1 %exitcond, label %for.end, label %for.body 90 91for.end: ; preds = %for.body, %entry 92 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 93 ret float %1 94} 95 96; Double pattern: 97; Check vectorization of reduction code which has an fadd instruction after 98; an fcmp instruction which compares an array element and 0. 99; 100; double fcmp_0_fadd_select2(ptr restrict x, const int N) { 101; double sum = 0. 102; for (int i = 0; i < N; ++i) 103; if (x[i] > 0.) 104; sum += x[i]; 105; return sum; 106; } 107 108define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly { 109; CHECK-LABEL: define double @fcmp_0_fadd_select2( 110; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 111; CHECK-NEXT: [[ENTRY:.*]]: 112; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 113; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 114; CHECK: [[FOR_HEADER]]: 115; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 116; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 117; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 118; CHECK: [[VECTOR_PH]]: 119; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 120; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 121; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 122; CHECK: [[VECTOR_BODY]]: 123; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 124; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 125; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 126; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]] 127; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 128; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4 129; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer 130; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]] 131; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]] 132; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 133; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 134; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 135; CHECK: [[MIDDLE_BLOCK]]: 136; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]]) 137; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 138; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 139; CHECK: [[SCALAR_PH]]: 140; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 141; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 142; CHECK-NEXT: br label %[[FOR_BODY:.*]] 143; CHECK: [[FOR_BODY]]: 144; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 145; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 146; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 147; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4 148; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00 149; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]] 150; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]] 151; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 152; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 153; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 154; CHECK: [[FOR_END_LOOPEXIT]]: 155; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 156; CHECK-NEXT: br label %[[FOR_END]] 157; CHECK: [[FOR_END]]: 158; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 159; CHECK-NEXT: ret double [[TMP9]] 160; 161entry: 162 %cmp.1 = icmp sgt i32 %N, 0 163 br i1 %cmp.1, label %for.header, label %for.end 164 165for.header: ; preds = %entry 166 %zext = zext i32 %N to i64 167 br label %for.body 168 169for.body: ; preds = %header, %for.body 170 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 171 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 172 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 173 %0 = load double, ptr %arrayidx, align 4 174 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00 175 %add = fadd fast double %0, %sum.1 176 %sum.2 = select i1 %cmp.2, double %add, double %sum.1 177 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 178 %exitcond = icmp eq i64 %indvars.iv.next, %zext 179 br i1 %exitcond, label %for.end, label %for.body 180 181for.end: ; preds = %for.body, %entry 182 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 183 ret double %1 184} 185 186; Float pattern: 187; Check vectorization of reduction code which has an fadd instruction after 188; an fcmp instruction which compares an array element and a floating-point 189; value. 190; 191; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) { 192; float sum = 0. 193; for (int i = 0; i < N; ++i) 194; if (x[i] > y) 195; sum += x[i]; 196; return sum; 197; } 198 199define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly { 200; CHECK-LABEL: define float @fcmp_val_fadd_select1( 201; CHECK-SAME: ptr noalias [[X:%.*]], float [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 202; CHECK-NEXT: [[ENTRY:.*]]: 203; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 204; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 205; CHECK: [[FOR_HEADER]]: 206; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 207; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 208; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 209; CHECK: [[VECTOR_PH]]: 210; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 211; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 212; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0 213; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 214; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 215; CHECK: [[VECTOR_BODY]]: 216; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 217; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 218; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 219; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 220; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 221; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 222; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 223; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 224; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]] 225; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 226; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 227; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 228; CHECK: [[MIDDLE_BLOCK]]: 229; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) 230; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 231; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 232; CHECK: [[SCALAR_PH]]: 233; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 234; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 235; CHECK-NEXT: br label %[[FOR_BODY:.*]] 236; CHECK: [[FOR_BODY]]: 237; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 238; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 239; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 240; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 241; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], [[Y]] 242; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]] 243; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]] 244; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 245; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 246; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 247; CHECK: [[FOR_END_LOOPEXIT]]: 248; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 249; CHECK-NEXT: br label %[[FOR_END]] 250; CHECK: [[FOR_END]]: 251; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 252; CHECK-NEXT: ret float [[TMP9]] 253; 254entry: 255 %cmp.1 = icmp sgt i32 %N, 0 256 br i1 %cmp.1, label %for.header, label %for.end 257 258for.header: ; preds = %entry 259 %zext = zext i32 %N to i64 260 br label %for.body 261 262for.body: ; preds = %header, %for.body 263 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 264 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 265 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 266 %0 = load float, ptr %arrayidx, align 4 267 %cmp.2 = fcmp fast ogt float %0, %y 268 %add = fadd fast float %0, %sum.1 269 %sum.2 = select i1 %cmp.2, float %add, float %sum.1 270 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 271 %exitcond = icmp eq i64 %indvars.iv.next, %zext 272 br i1 %exitcond, label %for.end, label %for.body 273 274for.end: ; preds = %for.body, %entry 275 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 276 ret float %1 277} 278 279; Double pattern: 280; Check vectorization of reduction code which has an fadd instruction after 281; an fcmp instruction which compares an array element and a floating-point 282; value. 283; 284; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) { 285; double sum = 0. 286; for (int i = 0; i < N; ++i) 287; if (x[i] > y) 288; sum += x[i]; 289; return sum; 290; } 291 292define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly { 293; CHECK-LABEL: define double @fcmp_val_fadd_select2( 294; CHECK-SAME: ptr noalias [[X:%.*]], double [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 295; CHECK-NEXT: [[ENTRY:.*]]: 296; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 297; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 298; CHECK: [[FOR_HEADER]]: 299; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 300; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 301; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 302; CHECK: [[VECTOR_PH]]: 303; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 304; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 305; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0 306; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer 307; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 308; CHECK: [[VECTOR_BODY]]: 309; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 310; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 311; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 312; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]] 313; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 314; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4 315; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 316; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]] 317; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]] 318; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 319; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 320; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 321; CHECK: [[MIDDLE_BLOCK]]: 322; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]]) 323; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 324; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 325; CHECK: [[SCALAR_PH]]: 326; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 327; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 328; CHECK-NEXT: br label %[[FOR_BODY:.*]] 329; CHECK: [[FOR_BODY]]: 330; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 331; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 332; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 333; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4 334; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], [[Y]] 335; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]] 336; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]] 337; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 338; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 339; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 340; CHECK: [[FOR_END_LOOPEXIT]]: 341; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 342; CHECK-NEXT: br label %[[FOR_END]] 343; CHECK: [[FOR_END]]: 344; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 345; CHECK-NEXT: ret double [[TMP9]] 346; 347entry: 348 %cmp.1 = icmp sgt i32 %N, 0 349 br i1 %cmp.1, label %for.header, label %for.end 350 351for.header: ; preds = %entry 352 %zext = zext i32 %N to i64 353 br label %for.body 354 355for.body: ; preds = %header, %for.body 356 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 357 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 358 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 359 %0 = load double, ptr %arrayidx, align 4 360 %cmp.2 = fcmp fast ogt double %0, %y 361 %add = fadd fast double %0, %sum.1 362 %sum.2 = select i1 %cmp.2, double %add, double %sum.1 363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 364 %exitcond = icmp eq i64 %indvars.iv.next, %zext 365 br i1 %exitcond, label %for.end, label %for.body 366 367for.end: ; preds = %for.body, %entry 368 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 369 ret double %1 370} 371 372; Float pattern: 373; Check vectorization of reduction code which has an fadd instruction after 374; an fcmp instruction which compares an array element and another array 375; element. 376; 377; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y, 378; const int N) { 379; float sum = 0. 380; for (int i = 0; i < N; ++i) 381; if (x[i] > y[i]) 382; sum += x[i]; 383; return sum; 384; } 385 386define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly { 387; CHECK-LABEL: define float @fcmp_array_elm_fadd_select1( 388; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 389; CHECK-NEXT: [[ENTRY:.*]]: 390; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 391; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 392; CHECK: [[FOR_HEADER]]: 393; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 394; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 395; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 396; CHECK: [[VECTOR_PH]]: 397; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 398; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 399; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 400; CHECK: [[VECTOR_BODY]]: 401; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 402; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] 403; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 404; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 405; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 406; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 407; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[TMP0]] 408; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0 409; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 410; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 411; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 412; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[VEC_PHI]] 413; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 414; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 415; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 416; CHECK: [[MIDDLE_BLOCK]]: 417; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP7]]) 418; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 419; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 420; CHECK: [[SCALAR_PH]]: 421; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 422; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 423; CHECK-NEXT: br label %[[FOR_BODY:.*]] 424; CHECK: [[FOR_BODY]]: 425; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 426; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 427; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 428; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 429; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]] 430; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 431; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP10]], [[TMP11]] 432; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP10]], [[SUM_1]] 433; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]] 434; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 435; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 436; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 437; CHECK: [[FOR_END_LOOPEXIT]]: 438; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] 439; CHECK-NEXT: br label %[[FOR_END]] 440; CHECK: [[FOR_END]]: 441; CHECK-NEXT: [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 442; CHECK-NEXT: ret float [[TMP12]] 443; 444entry: 445 %cmp.1 = icmp sgt i32 %N, 0 446 br i1 %cmp.1, label %for.header, label %for.end 447 448for.header: ; preds = %entry 449 %zext = zext i32 %N to i64 450 br label %for.body 451 452for.body: ; preds = %for.body, %for.header 453 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 454 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 455 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv 456 %0 = load float, ptr %arrayidx.1, align 4 457 %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv 458 %1 = load float, ptr %arrayidx.2, align 4 459 %cmp.2 = fcmp fast ogt float %0, %1 460 %add = fadd fast float %0, %sum.1 461 %sum.2 = select i1 %cmp.2, float %add, float %sum.1 462 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 463 %exitcond = icmp eq i64 %indvars.iv.next, %zext 464 br i1 %exitcond, label %for.end, label %for.body 465 466for.end: ; preds = %for.body, %entry 467 %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 468 ret float %2 469} 470 471; Double pattern: 472; Check vectorization of reduction code which has an fadd instruction after 473; an fcmp instruction which compares an array element and another array 474; element. 475; 476; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y, 477; const int N) { 478; double sum = 0. 479; for (int i = 0; i < N; ++i) 480; if (x[i] > y[i]) 481; sum += x[i]; 482; return sum; 483; } 484 485define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly { 486; CHECK-LABEL: define double @fcmp_array_elm_fadd_select2( 487; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 488; CHECK-NEXT: [[ENTRY:.*]]: 489; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 490; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 491; CHECK: [[FOR_HEADER]]: 492; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 493; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 494; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 495; CHECK: [[VECTOR_PH]]: 496; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 497; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 498; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 499; CHECK: [[VECTOR_BODY]]: 500; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 501; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] 502; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 503; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]] 504; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 505; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4 506; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[TMP0]] 507; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0 508; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 4 509; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[WIDE_LOAD1]] 510; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]] 511; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x double> [[TMP6]], <4 x double> [[VEC_PHI]] 512; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 513; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 514; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 515; CHECK: [[MIDDLE_BLOCK]]: 516; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP7]]) 517; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 518; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 519; CHECK: [[SCALAR_PH]]: 520; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 521; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 522; CHECK-NEXT: br label %[[FOR_BODY:.*]] 523; CHECK: [[FOR_BODY]]: 524; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 525; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 526; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 527; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 528; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]] 529; CHECK-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 530; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP10]], [[TMP11]] 531; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP10]], [[SUM_1]] 532; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]] 533; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 534; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 535; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 536; CHECK: [[FOR_END_LOOPEXIT]]: 537; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] 538; CHECK-NEXT: br label %[[FOR_END]] 539; CHECK: [[FOR_END]]: 540; CHECK-NEXT: [[TMP12:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 541; CHECK-NEXT: ret double [[TMP12]] 542; 543entry: 544 %cmp.1 = icmp sgt i32 %N, 0 545 br i1 %cmp.1, label %for.header, label %for.end 546 547for.header: ; preds = %entry 548 %zext = zext i32 %N to i64 549 br label %for.body 550 551for.body: ; preds = %for.body, %for.header 552 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 553 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 554 %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv 555 %0 = load double, ptr %arrayidx.1, align 4 556 %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv 557 %1 = load double, ptr %arrayidx.2, align 4 558 %cmp.2 = fcmp fast ogt double %0, %1 559 %add = fadd fast double %0, %sum.1 560 %sum.2 = select i1 %cmp.2, double %add, double %sum.1 561 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 562 %exitcond = icmp eq i64 %indvars.iv.next, %zext 563 br i1 %exitcond, label %for.end, label %for.body 564 565for.end: ; preds = %for.body, %entry 566 %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 567 ret double %2 568} 569 570; Float pattern: 571; Check vectorization of reduction code which has an fsub instruction after 572; an fcmp instruction which compares an array element and 0. 573; 574; float fcmp_0_fsub_select1(ptr restrict x, const int N) { 575; float sum = 0. 576; for (int i = 0; i < N; ++i) 577; if (x[i] > (float)0.) 578; sum -= x[i]; 579; return sum; 580; } 581 582define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly { 583; CHECK-LABEL: define float @fcmp_0_fsub_select1( 584; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 585; CHECK-NEXT: [[ENTRY:.*]]: 586; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 587; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 588; CHECK: [[FOR_HEADER]]: 589; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 590; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 591; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 592; CHECK: [[VECTOR_PH]]: 593; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 594; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 595; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 596; CHECK: [[VECTOR_BODY]]: 597; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 598; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 599; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 600; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 601; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 602; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 603; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer 604; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 605; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]] 606; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 607; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 608; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 609; CHECK: [[MIDDLE_BLOCK]]: 610; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) 611; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 612; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 613; CHECK: [[SCALAR_PH]]: 614; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 615; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 616; CHECK-NEXT: br label %[[FOR_BODY:.*]] 617; CHECK: [[FOR_BODY]]: 618; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 619; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 620; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 621; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 622; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00 623; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[SUM_1]], [[TMP8]] 624; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]] 625; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 626; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 627; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 628; CHECK: [[FOR_END_LOOPEXIT]]: 629; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 630; CHECK-NEXT: br label %[[FOR_END]] 631; CHECK: [[FOR_END]]: 632; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 633; CHECK-NEXT: ret float [[TMP9]] 634; 635entry: 636 %cmp.1 = icmp sgt i32 %N, 0 637 br i1 %cmp.1, label %for.header, label %for.end 638 639for.header: ; preds = %entry 640 %zext = zext i32 %N to i64 641 br label %for.body 642 643for.body: ; preds = %for.body, %for.header 644 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 645 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 646 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 647 %0 = load float, ptr %arrayidx, align 4 648 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00 649 %sub = fsub fast float %sum.1, %0 650 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1 651 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 652 %exitcond = icmp eq i64 %indvars.iv.next, %zext 653 br i1 %exitcond, label %for.end, label %for.body 654 655for.end: ; preds = %for.body, %entry 656 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 657 ret float %1 658} 659 660; Float pattern: 661; Check that is not vectorized if fp-instruction has no fast-math property. 662; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) { 663; float sum = 0. 664; for (int i = 0; i < N; ++i) 665; if (x[i] > (float)0.) 666; sum -= x[i]; 667; return sum; 668; } 669 670define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly { 671; CHECK-LABEL: define float @fcmp_0_fsub_select1_novectorize( 672; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 673; CHECK-NEXT: [[ENTRY:.*]]: 674; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 675; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 676; CHECK: [[FOR_HEADER]]: 677; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 678; CHECK-NEXT: br label %[[FOR_BODY:.*]] 679; CHECK: [[FOR_BODY]]: 680; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 681; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 682; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 683; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 684; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00 685; CHECK-NEXT: [[SUB:%.*]] = fsub float [[SUM_1]], [[TMP0]] 686; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]] 687; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 688; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 689; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 690; CHECK: [[FOR_END_LOOPEXIT]]: 691; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ] 692; CHECK-NEXT: br label %[[FOR_END]] 693; CHECK: [[FOR_END]]: 694; CHECK-NEXT: [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 695; CHECK-NEXT: ret float [[TMP1]] 696; 697entry: 698 %cmp.1 = icmp sgt i32 %N, 0 699 br i1 %cmp.1, label %for.header, label %for.end 700 701for.header: ; preds = %entry 702 %zext = zext i32 %N to i64 703 br label %for.body 704 705for.body: ; preds = %for.body, %for.header 706 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 707 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 708 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 709 %0 = load float, ptr %arrayidx, align 4 710 %cmp.2 = fcmp ogt float %0, 0.000000e+00 711 %sub = fsub float %sum.1, %0 712 %sum.2 = select i1 %cmp.2, float %sub, float %sum.1 713 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 714 %exitcond = icmp eq i64 %indvars.iv.next, %zext 715 br i1 %exitcond, label %for.end, label %for.body 716 717for.end: ; preds = %for.body, %entry 718 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 719 ret float %1 720} 721 722; Double pattern: 723; Check vectorization of reduction code which has an fsub instruction after 724; an fcmp instruction which compares an array element and 0. 725; 726; double fcmp_0_fsub_select2(ptr restrict x, const int N) { 727; double sum = 0. 728; for (int i = 0; i < N; ++i) 729; if (x[i] > 0.) 730; sum -= x[i]; 731; return sum; 732; } 733 734define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly { 735; CHECK-LABEL: define double @fcmp_0_fsub_select2( 736; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 737; CHECK-NEXT: [[ENTRY:.*]]: 738; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 739; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 740; CHECK: [[FOR_HEADER]]: 741; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 742; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 743; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 744; CHECK: [[VECTOR_PH]]: 745; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 746; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 747; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 748; CHECK: [[VECTOR_BODY]]: 749; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 750; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 751; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 752; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]] 753; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 754; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4 755; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer 756; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] 757; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]] 758; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 759; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 760; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 761; CHECK: [[MIDDLE_BLOCK]]: 762; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]]) 763; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 764; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 765; CHECK: [[SCALAR_PH]]: 766; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 767; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 768; CHECK-NEXT: br label %[[FOR_BODY:.*]] 769; CHECK: [[FOR_BODY]]: 770; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 771; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 772; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 773; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4 774; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00 775; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[SUM_1]], [[TMP8]] 776; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]] 777; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 778; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 779; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 780; CHECK: [[FOR_END_LOOPEXIT]]: 781; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 782; CHECK-NEXT: br label %[[FOR_END]] 783; CHECK: [[FOR_END]]: 784; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 785; CHECK-NEXT: ret double [[TMP9]] 786; 787entry: 788 %cmp.1 = icmp sgt i32 %N, 0 789 br i1 %cmp.1, label %for.header, label %for.end 790 791for.header: ; preds = %entry 792 %zext = zext i32 %N to i64 793 br label %for.body 794 795for.body: ; preds = %for.body, %for.header 796 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 797 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 798 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 799 %0 = load double, ptr %arrayidx, align 4 800 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00 801 %sub = fsub fast double %sum.1, %0 802 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1 803 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 804 %exitcond = icmp eq i64 %indvars.iv.next, %zext 805 br i1 %exitcond, label %for.end, label %for.body 806 807for.end: ; preds = %for.body, %entry 808 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 809 ret double %1 810} 811 812; Double pattern: 813; Check that is not vectorized if fp-instruction has no fast-math property. 814; 815; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) { 816; double sum = 0. 817; for (int i = 0; i < N; ++i) 818; if (x[i] > 0.) 819; sum -= x[i]; 820; return sum; 821; } 822 823define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly { 824; CHECK-LABEL: define double @fcmp_0_fsub_select2_notvectorize( 825; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 826; CHECK-NEXT: [[ENTRY:.*]]: 827; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 828; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 829; CHECK: [[FOR_HEADER]]: 830; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 831; CHECK-NEXT: br label %[[FOR_BODY:.*]] 832; CHECK: [[FOR_BODY]]: 833; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 834; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 835; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 836; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4 837; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00 838; CHECK-NEXT: [[SUB:%.*]] = fsub double [[SUM_1]], [[TMP0]] 839; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]] 840; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 841; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 842; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 843; CHECK: [[FOR_END_LOOPEXIT]]: 844; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ] 845; CHECK-NEXT: br label %[[FOR_END]] 846; CHECK: [[FOR_END]]: 847; CHECK-NEXT: [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 848; CHECK-NEXT: ret double [[TMP1]] 849; 850entry: 851 %cmp.1 = icmp sgt i32 %N, 0 852 br i1 %cmp.1, label %for.header, label %for.end 853 854for.header: ; preds = %entry 855 %zext = zext i32 %N to i64 856 br label %for.body 857 858for.body: ; preds = %for.body, %for.header 859 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 860 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 861 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 862 %0 = load double, ptr %arrayidx, align 4 863 %cmp.2 = fcmp ogt double %0, 0.000000e+00 864 %sub = fsub double %sum.1, %0 865 %sum.2 = select i1 %cmp.2, double %sub, double %sum.1 866 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 867 %exitcond = icmp eq i64 %indvars.iv.next, %zext 868 br i1 %exitcond, label %for.end, label %for.body 869 870for.end: ; preds = %for.body, %entry 871 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 872 ret double %1 873} 874 875; Float pattern: 876; Check vectorization of reduction code which has an fmul instruction after 877; an fcmp instruction which compares an array element and 0. 878; 879; float fcmp_0_fmult_select1(ptr restrict x, const int N) { 880; float sum = 0. 881; for (int i = 0; i < N; ++i) 882; if (x[i] > (float)0.) 883; sum *= x[i]; 884; return sum; 885; } 886 887define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly { 888; CHECK-LABEL: define float @fcmp_0_fmult_select1( 889; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 890; CHECK-NEXT: [[ENTRY:.*]]: 891; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 892; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 893; CHECK: [[FOR_HEADER]]: 894; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 895; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 896; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 897; CHECK: [[VECTOR_PH]]: 898; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 899; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 900; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 901; CHECK: [[VECTOR_BODY]]: 902; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 903; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 904; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 905; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 906; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 907; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 908; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer 909; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 910; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]] 911; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 912; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 913; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 914; CHECK: [[MIDDLE_BLOCK]]: 915; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) 916; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 917; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 918; CHECK: [[SCALAR_PH]]: 919; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 920; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 921; CHECK-NEXT: br label %[[FOR_BODY:.*]] 922; CHECK: [[FOR_BODY]]: 923; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 924; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 925; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 926; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 927; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00 928; CHECK-NEXT: [[MULT:%.*]] = fmul fast float [[SUM_1]], [[TMP8]] 929; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]] 930; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 931; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 932; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 933; CHECK: [[FOR_END_LOOPEXIT]]: 934; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 935; CHECK-NEXT: br label %[[FOR_END]] 936; CHECK: [[FOR_END]]: 937; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 938; CHECK-NEXT: ret float [[TMP9]] 939; 940entry: 941 %cmp.1 = icmp sgt i32 %N, 0 942 br i1 %cmp.1, label %for.header, label %for.end 943 944for.header: ; preds = %entry 945 %zext = zext i32 %N to i64 946 br label %for.body 947 948for.body: ; preds = %for.body, %for.header 949 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 950 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 951 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 952 %0 = load float, ptr %arrayidx, align 4 953 %cmp.2 = fcmp fast ogt float %0, 0.000000e+00 954 %mult = fmul fast float %sum.1, %0 955 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1 956 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 957 %exitcond = icmp eq i64 %indvars.iv.next, %zext 958 br i1 %exitcond, label %for.end, label %for.body 959 960for.end: ; preds = %for.body, %entry 961 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 962 ret float %1 963} 964 965; Float pattern: 966; Check that is not vectorized if fp-instruction has no fast-math property. 967; 968; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) { 969; float sum = 0. 970; for (int i = 0; i < N; ++i) 971; if (x[i] > (float)0.) 972; sum *= x[i]; 973; return sum; 974; } 975 976define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly { 977; CHECK-LABEL: define float @fcmp_0_fmult_select1_notvectorize( 978; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 979; CHECK-NEXT: [[ENTRY:.*]]: 980; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 981; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 982; CHECK: [[FOR_HEADER]]: 983; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 984; CHECK-NEXT: br label %[[FOR_BODY:.*]] 985; CHECK: [[FOR_BODY]]: 986; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 987; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 988; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 989; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 990; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00 991; CHECK-NEXT: [[MULT:%.*]] = fmul float [[SUM_1]], [[TMP0]] 992; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]] 993; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 994; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 995; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 996; CHECK: [[FOR_END_LOOPEXIT]]: 997; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ] 998; CHECK-NEXT: br label %[[FOR_END]] 999; CHECK: [[FOR_END]]: 1000; CHECK-NEXT: [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1001; CHECK-NEXT: ret float [[TMP1]] 1002; 1003entry: 1004 %cmp.1 = icmp sgt i32 %N, 0 1005 br i1 %cmp.1, label %for.header, label %for.end 1006 1007for.header: ; preds = %entry 1008 %zext = zext i32 %N to i64 1009 br label %for.body 1010 1011for.body: ; preds = %for.body, %for.header 1012 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1013 %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 1014 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 1015 %0 = load float, ptr %arrayidx, align 4 1016 %cmp.2 = fcmp ogt float %0, 0.000000e+00 1017 %mult = fmul float %sum.1, %0 1018 %sum.2 = select i1 %cmp.2, float %mult, float %sum.1 1019 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1020 %exitcond = icmp eq i64 %indvars.iv.next, %zext 1021 br i1 %exitcond, label %for.end, label %for.body 1022 1023for.end: ; preds = %for.body, %entry 1024 %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 1025 ret float %1 1026} 1027 1028; Double pattern: 1029; Check vectorization of reduction code which has an fmul instruction after 1030; an fcmp instruction which compares an array element and 0. 1031; 1032; double fcmp_0_fmult_select2(ptr restrict x, const int N) { 1033; double sum = 0. 1034; for (int i = 0; i < N; ++i) 1035; if (x[i] > 0.) 1036; sum *= x[i]; 1037; return sum; 1038; } 1039 1040define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly { 1041; CHECK-LABEL: define double @fcmp_0_fmult_select2( 1042; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1043; CHECK-NEXT: [[ENTRY:.*]]: 1044; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 1045; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 1046; CHECK: [[FOR_HEADER]]: 1047; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 1048; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 1049; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1050; CHECK: [[VECTOR_PH]]: 1051; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 1052; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 1053; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1054; CHECK: [[VECTOR_BODY]]: 1055; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1056; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 1057; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1058; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]] 1059; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 1060; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4 1061; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer 1062; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] 1063; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]] 1064; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1065; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1066; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1067; CHECK: [[MIDDLE_BLOCK]]: 1068; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[TMP5]]) 1069; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 1070; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1071; CHECK: [[SCALAR_PH]]: 1072; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1073; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ] 1074; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1075; CHECK: [[FOR_BODY]]: 1076; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1077; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 1078; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 1079; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4 1080; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00 1081; CHECK-NEXT: [[MULT:%.*]] = fmul fast double [[SUM_1]], [[TMP8]] 1082; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]] 1083; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1084; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 1085; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1086; CHECK: [[FOR_END_LOOPEXIT]]: 1087; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 1088; CHECK-NEXT: br label %[[FOR_END]] 1089; CHECK: [[FOR_END]]: 1090; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1091; CHECK-NEXT: ret double [[TMP9]] 1092; 1093entry: 1094 %cmp.1 = icmp sgt i32 %N, 0 1095 br i1 %cmp.1, label %for.header, label %for.end 1096 1097for.header: ; preds = %entry 1098 %zext = zext i32 %N to i64 1099 br label %for.body 1100 1101for.body: ; preds = %for.body, %for.header 1102 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1103 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 1104 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 1105 %0 = load double, ptr %arrayidx, align 4 1106 %cmp.2 = fcmp fast ogt double %0, 0.000000e+00 1107 %mult = fmul fast double %sum.1, %0 1108 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1 1109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1110 %exitcond = icmp eq i64 %indvars.iv.next, %zext 1111 br i1 %exitcond, label %for.end, label %for.body 1112 1113for.end: ; preds = %for.body, %entry 1114 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 1115 ret double %1 1116} 1117 1118; Double pattern: 1119; Check that is not vectorized if fp-instruction has no fast-math property. 1120; 1121; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) { 1122; double sum = 0. 1123; for (int i = 0; i < N; ++i) 1124; if (x[i] > 0.) 1125; sum *= x[i]; 1126; return sum; 1127; } 1128 1129define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly { 1130; CHECK-LABEL: define double @fcmp_0_fmult_select2_notvectorize( 1131; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1132; CHECK-NEXT: [[ENTRY:.*]]: 1133; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 1134; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 1135; CHECK: [[FOR_HEADER]]: 1136; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 1137; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1138; CHECK: [[FOR_BODY]]: 1139; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1140; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 1141; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] 1142; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4 1143; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00 1144; CHECK-NEXT: [[MULT:%.*]] = fmul double [[SUM_1]], [[TMP0]] 1145; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]] 1146; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1147; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 1148; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 1149; CHECK: [[FOR_END_LOOPEXIT]]: 1150; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ] 1151; CHECK-NEXT: br label %[[FOR_END]] 1152; CHECK: [[FOR_END]]: 1153; CHECK-NEXT: [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1154; CHECK-NEXT: ret double [[TMP1]] 1155; 1156entry: 1157 %cmp.1 = icmp sgt i32 %N, 0 1158 br i1 %cmp.1, label %for.header, label %for.end 1159 1160for.header: ; preds = %entry 1161 %zext = zext i32 %N to i64 1162 br label %for.body 1163 1164for.body: ; preds = %for.body, %for.header 1165 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1166 %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ] 1167 %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv 1168 %0 = load double, ptr %arrayidx, align 4 1169 %cmp.2 = fcmp ogt double %0, 0.000000e+00 1170 %mult = fmul double %sum.1, %0 1171 %sum.2 = select i1 %cmp.2, double %mult, double %sum.1 1172 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1173 %exitcond = icmp eq i64 %indvars.iv.next, %zext 1174 br i1 %exitcond, label %for.end, label %for.body 1175 1176for.end: ; preds = %for.body, %entry 1177 %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ] 1178 ret double %1 1179} 1180 1181; Float multi pattern 1182; Check vectorisation of reduction code with a pair of selects to different 1183; fadd patterns. 1184; 1185; float fcmp_multi(ptr a, int n) { 1186; float sum=0.0; 1187; for (int i=0;i<n;i++) { 1188; if (a[i]>1.0) 1189; sum+=a[i]; 1190; else if (a[i]<3.0) 1191; sum+=2*a[i]; 1192; else 1193; sum+=3*a[i]; 1194; } 1195; return sum; 1196; } 1197 1198define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly { 1199; CHECK-LABEL: define float @fcmp_multi( 1200; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1201; CHECK-NEXT: [[ENTRY:.*]]: 1202; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0 1203; CHECK-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 1204; CHECK: [[FOR_BODY_PREHEADER]]: 1205; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1206; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 1207; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1208; CHECK: [[VECTOR_PH]]: 1209; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 1210; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1211; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1212; CHECK: [[VECTOR_BODY]]: 1213; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1214; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] 1215; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1216; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 1217; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1218; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1219; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) 1220; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) 1221; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 1222; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) 1223; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer 1224; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 1225; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) 1226; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] 1227; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]] 1228; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]] 1229; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1230; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1231; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1232; CHECK: [[MIDDLE_BLOCK]]: 1233; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP10]]) 1234; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1235; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1236; CHECK: [[SCALAR_PH]]: 1237; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 1238; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ] 1239; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1240; CHECK: [[FOR_BODY]]: 1241; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] 1242; CHECK-NEXT: [[SUM_011:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ] 1243; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1244; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1245; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP13]], 1.000000e+00 1246; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[IF_ELSE:.*]] 1247; CHECK: [[IF_ELSE]]: 1248; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP13]], 3.000000e+00 1249; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[IF_ELSE14:.*]] 1250; CHECK: [[IF_THEN10]]: 1251; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP13]], 2.000000e+00 1252; CHECK-NEXT: br label %[[FOR_INC]] 1253; CHECK: [[IF_ELSE14]]: 1254; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[TMP13]], 3.000000e+00 1255; CHECK-NEXT: br label %[[FOR_INC]] 1256; CHECK: [[FOR_INC]]: 1257; CHECK-NEXT: [[DOTPN:%.*]] = phi float [ [[MUL]], %[[IF_THEN10]] ], [ [[MUL17]], %[[IF_ELSE14]] ], [ [[TMP13]], %[[FOR_BODY]] ] 1258; CHECK-NEXT: [[SUM_1]] = fadd fast float [[DOTPN]], [[SUM_011]] 1259; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1260; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1261; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1262; CHECK: [[FOR_END_LOOPEXIT]]: 1263; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] 1264; CHECK-NEXT: br label %[[FOR_END]] 1265; CHECK: [[FOR_END]]: 1266; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1267; CHECK-NEXT: ret float [[SUM_0_LCSSA]] 1268; 1269entry: 1270 %cmp10 = icmp sgt i32 %n, 0 1271 br i1 %cmp10, label %for.body.preheader, label %for.end 1272 1273for.body.preheader: ; preds = %entry 1274 %wide.trip.count = zext i32 %n to i64 1275 br label %for.body 1276 1277for.body: ; preds = %for.inc, %for.body.preheader 1278 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 1279 %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ] 1280 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1281 %0 = load float, ptr %arrayidx, align 4 1282 %cmp1 = fcmp ogt float %0, 1.000000e+00 1283 br i1 %cmp1, label %for.inc, label %if.else 1284 1285if.else: ; preds = %for.body 1286 %cmp8 = fcmp olt float %0, 3.000000e+00 1287 br i1 %cmp8, label %if.then10, label %if.else14 1288 1289if.then10: ; preds = %if.else 1290 %mul = fmul fast float %0, 2.000000e+00 1291 br label %for.inc 1292 1293if.else14: ; preds = %if.else 1294 %mul17 = fmul fast float %0, 3.000000e+00 1295 br label %for.inc 1296 1297for.inc: ; preds = %for.body, %if.else14, %if.then10 1298 %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ] 1299 %sum.1 = fadd fast float %.pn, %sum.011 1300 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1301 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 1302 br i1 %exitcond, label %for.end, label %for.body 1303 1304for.end: ; preds = %for.inc, %entry 1305 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ] 1306 ret float %sum.0.lcssa 1307} 1308 1309; Float fadd + fsub patterns 1310; Check vectorisation of reduction code with a pair of selects to different 1311; instructions { fadd, fsub } but equivalent (change in constant). 1312; 1313; float fcmp_multi(ptr a, int n) { 1314; float sum=0.0; 1315; for (int i=0;i<n;i++) { 1316; if (a[i]>1.0) 1317; sum+=a[i]; 1318; else if (a[i]<3.0) 1319; sum-=a[i]; 1320; } 1321; return sum; 1322; } 1323 1324define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly { 1325; CHECK-LABEL: define float @fcmp_fadd_fsub( 1326; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1327; CHECK-NEXT: [[ENTRY:.*]]: 1328; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 1329; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 1330; CHECK: [[FOR_BODY_PREHEADER]]: 1331; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1332; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 1333; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1334; CHECK: [[VECTOR_PH]]: 1335; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 1336; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1337; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1338; CHECK: [[VECTOR_BODY]]: 1339; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1340; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI1:%.*]], %[[VECTOR_BODY]] ] 1341; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1342; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 1343; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1344; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1345; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) 1346; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) 1347; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 1348; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 1349; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 1350; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) 1351; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer 1352; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]] 1353; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] 1354; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1355; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1356; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1357; CHECK: [[MIDDLE_BLOCK]]: 1358; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI1]]) 1359; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1360; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1361; CHECK: [[SCALAR_PH]]: 1362; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 1363; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ] 1364; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1365; CHECK: [[FOR_BODY]]: 1366; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] 1367; CHECK-NEXT: [[SUM_010:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ] 1368; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1369; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1370; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP12]], 1.000000e+00 1371; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] 1372; CHECK: [[IF_THEN]]: 1373; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP12]], [[SUM_010]] 1374; CHECK-NEXT: br label %[[FOR_INC]] 1375; CHECK: [[IF_ELSE]]: 1376; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP12]], 3.000000e+00 1377; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]] 1378; CHECK: [[IF_THEN10]]: 1379; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[SUM_010]], [[TMP12]] 1380; CHECK-NEXT: br label %[[FOR_INC]] 1381; CHECK: [[FOR_INC]]: 1382; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[SUB]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ] 1383; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1384; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1385; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1386; CHECK: [[FOR_END_LOOPEXIT]]: 1387; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] 1388; CHECK-NEXT: br label %[[FOR_END]] 1389; CHECK: [[FOR_END]]: 1390; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1391; CHECK-NEXT: ret float [[SUM_0_LCSSA]] 1392; 1393entry: 1394 %cmp9 = icmp sgt i32 %n, 0 1395 br i1 %cmp9, label %for.body.preheader, label %for.end 1396 1397for.body.preheader: ; preds = %entry 1398 %wide.trip.count = zext i32 %n to i64 1399 br label %for.body 1400 1401for.body: ; preds = %for.inc, %for.body.preheader 1402 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 1403 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ] 1404 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1405 %0 = load float, ptr %arrayidx, align 4 1406 %cmp1 = fcmp ogt float %0, 1.000000e+00 1407 br i1 %cmp1, label %if.then, label %if.else 1408 1409if.then: ; preds = %for.body 1410 %add = fadd fast float %0, %sum.010 1411 br label %for.inc 1412 1413if.else: ; preds = %for.body 1414 %cmp8 = fcmp olt float %0, 3.000000e+00 1415 br i1 %cmp8, label %if.then10, label %for.inc 1416 1417if.then10: ; preds = %if.else 1418 %sub = fsub fast float %sum.010, %0 1419 br label %for.inc 1420 1421for.inc: ; preds = %if.then, %if.then10, %if.else 1422 %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ] 1423 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1424 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 1425 br i1 %exitcond, label %for.end, label %for.body 1426 1427for.end: ; preds = %for.inc, %entry 1428 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ] 1429 ret float %sum.0.lcssa 1430} 1431 1432; Float fadd + fmul patterns 1433; Check lack of vectorisation of reduction code with a pair of non-compatible 1434; instructions { fadd, fmul }. 1435; 1436; float fcmp_multi(ptr a, int n) { 1437; float sum=0.0; 1438; for (int i=0;i<n;i++) { 1439; if (a[i]>1.0) 1440; sum+=a[i]; 1441; else if (a[i]<3.0) 1442; sum*=a[i]; 1443; } 1444; return sum; 1445; } 1446 1447define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly { 1448; CHECK-LABEL: define float @fcmp_fadd_fmul( 1449; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1450; CHECK-NEXT: [[ENTRY:.*]]: 1451; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 1452; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 1453; CHECK: [[FOR_BODY_PREHEADER]]: 1454; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1455; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1456; CHECK: [[FOR_BODY]]: 1457; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] 1458; CHECK-NEXT: [[SUM_010:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ] 1459; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1460; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1461; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 1462; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] 1463; CHECK: [[IF_THEN]]: 1464; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[SUM_010]] 1465; CHECK-NEXT: br label %[[FOR_INC]] 1466; CHECK: [[IF_ELSE]]: 1467; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP0]], 3.000000e+00 1468; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]] 1469; CHECK: [[IF_THEN10]]: 1470; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP0]], [[SUM_010]] 1471; CHECK-NEXT: br label %[[FOR_INC]] 1472; CHECK: [[FOR_INC]]: 1473; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[MUL]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ] 1474; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1475; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1476; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 1477; CHECK: [[FOR_END_LOOPEXIT]]: 1478; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ] 1479; CHECK-NEXT: br label %[[FOR_END]] 1480; CHECK: [[FOR_END]]: 1481; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1482; CHECK-NEXT: ret float [[SUM_0_LCSSA]] 1483; 1484entry: 1485 %cmp9 = icmp sgt i32 %n, 0 1486 br i1 %cmp9, label %for.body.preheader, label %for.end 1487 1488for.body.preheader: ; preds = %entry 1489 %wide.trip.count = zext i32 %n to i64 1490 br label %for.body 1491 1492for.body: ; preds = %for.inc, %for.body.preheader 1493 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 1494 %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ] 1495 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1496 %0 = load float, ptr %arrayidx, align 4 1497 %cmp1 = fcmp ogt float %0, 1.000000e+00 1498 br i1 %cmp1, label %if.then, label %if.else 1499 1500if.then: ; preds = %for.body 1501 %add = fadd fast float %0, %sum.010 1502 br label %for.inc 1503 1504if.else: ; preds = %for.body 1505 %cmp8 = fcmp olt float %0, 3.000000e+00 1506 br i1 %cmp8, label %if.then10, label %for.inc 1507 1508if.then10: ; preds = %if.else 1509 %mul = fmul fast float %0, %sum.010 1510 br label %for.inc 1511 1512for.inc: ; preds = %if.then, %if.then10, %if.else 1513 %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ] 1514 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1515 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 1516 br i1 %exitcond, label %for.end, label %for.body 1517 1518for.end: ; preds = %for.inc, %entry 1519 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ] 1520 ret float %sum.0.lcssa 1521} 1522 1523; Float fadd + store patterns 1524; Check lack of vectorisation of reduction code with a store back, given it 1525; has loop dependency on a[i]. 1526; 1527; float fcmp_store_back(float a[], int LEN) { 1528; float sum = 0.0; 1529; for (int i = 0; i < LEN; i++) { 1530; sum += a[i]; 1531; a[i] = sum; 1532; } 1533; return sum; 1534; } 1535 1536define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly { 1537; CHECK-LABEL: define float @fcmp_store_back( 1538; CHECK-SAME: ptr captures(none) [[A:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 1539; CHECK-NEXT: [[ENTRY:.*]]: 1540; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[LEN]], 0 1541; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 1542; CHECK: [[FOR_BODY_PREHEADER]]: 1543; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN]] to i64 1544; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1545; CHECK: [[FOR_BODY]]: 1546; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1547; CHECK-NEXT: [[SUM_08:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] 1548; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1549; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1550; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP0]], [[SUM_08]] 1551; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX]], align 4 1552; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1553; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1554; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 1555; CHECK: [[FOR_END_LOOPEXIT]]: 1556; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ] 1557; CHECK-NEXT: br label %[[FOR_END]] 1558; CHECK: [[FOR_END]]: 1559; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1560; CHECK-NEXT: ret float [[SUM_0_LCSSA]] 1561; 1562entry: 1563 %cmp7 = icmp sgt i32 %LEN, 0 1564 br i1 %cmp7, label %for.body.preheader, label %for.end 1565 1566for.body.preheader: ; preds = %entry 1567 %wide.trip.count = zext i32 %LEN to i64 1568 br label %for.body 1569 1570for.body: ; preds = %for.body, %for.body.preheader 1571 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 1572 %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ] 1573 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1574 %0 = load float, ptr %arrayidx, align 4 1575 %add = fadd fast float %0, %sum.08 1576 store float %add, ptr %arrayidx, align 4 1577 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1578 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 1579 br i1 %exitcond, label %for.end, label %for.body 1580 1581for.end: ; preds = %for.body, %entry 1582 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] 1583 ret float %sum.0.lcssa 1584} 1585 1586define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly { 1587; CHECK-LABEL: define i64 @fcmp_0_add_select2( 1588; CHECK-SAME: ptr noalias [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1589; CHECK-NEXT: [[ENTRY:.*]]: 1590; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i64 [[N]], 0 1591; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 1592; CHECK: [[FOR_HEADER]]: 1593; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 1594; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1595; CHECK: [[VECTOR_PH]]: 1596; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 1597; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1598; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1599; CHECK: [[VECTOR_BODY]]: 1600; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1601; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 1602; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1603; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 1604; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1605; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1606; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer 1607; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2) 1608; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]] 1609; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1610; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1611; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1612; CHECK: [[MIDDLE_BLOCK]]: 1613; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]]) 1614; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1615; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1616; CHECK: [[SCALAR_PH]]: 1617; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1618; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1619; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1620; CHECK: [[FOR_BODY]]: 1621; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1622; CHECK-NEXT: [[SUM_1:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 1623; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 1624; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1625; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00 1626; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SUM_1]], 2 1627; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i64 [[ADD]], i64 [[SUM_1]] 1628; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1629; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] 1630; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1631; CHECK: [[FOR_END_LOOPEXIT]]: 1632; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i64 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 1633; CHECK-NEXT: br label %[[FOR_END]] 1634; CHECK: [[FOR_END]]: 1635; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1636; CHECK-NEXT: ret i64 [[TMP9]] 1637; 1638entry: 1639 %cmp.1 = icmp sgt i64 %N, 0 1640 br i1 %cmp.1, label %for.header, label %for.end 1641 1642for.header: ; preds = %entry 1643 br label %for.body 1644 1645for.body: ; preds = %header, %for.body 1646 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1647 %sum.1 = phi i64 [ 0, %for.header ], [ %sum.2, %for.body ] 1648 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 1649 %0 = load float, ptr %arrayidx, align 4 1650 %cmp.2 = fcmp ogt float %0, 0.000000e+00 1651 %add = add nsw i64 %sum.1, 2 1652 %sum.2 = select i1 %cmp.2, i64 %add, i64 %sum.1 1653 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1654 %exitcond = icmp eq i64 %indvars.iv.next, %N 1655 br i1 %exitcond, label %for.end, label %for.body 1656 1657for.end: ; preds = %for.body, %entry 1658 %1 = phi i64 [ 0, %entry ], [ %sum.2, %for.body ] 1659 ret i64 %1 1660} 1661 1662; FIXME: %indvars.iv.next is poison on first iteration due to sub nuw 0, 1. 1663define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { 1664; CHECK-LABEL: define i32 @fcmp_0_sub_select1( 1665; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1666; CHECK-NEXT: [[ENTRY:.*]]: 1667; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 1668; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 1669; CHECK: [[FOR_HEADER]]: 1670; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 1671; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[ZEXT]] 1672; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1673; CHECK: [[VECTOR_PH]]: 1674; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 1675; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 1676; CHECK-NEXT: [[IND_END:%.*]] = sub i64 0, [[N_VEC]] 1677; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1678; CHECK: [[VECTOR_BODY]]: 1679; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1680; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] 1681; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] 1682; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 1683; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]] 1684; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 1685; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3 1686; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 1687; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1688; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer 1689; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2) 1690; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]] 1691; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1692; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1693; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1694; CHECK: [[MIDDLE_BLOCK]]: 1695; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) 1696; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 1697; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1698; CHECK: [[SCALAR_PH]]: 1699; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1700; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1701; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1702; CHECK: [[FOR_BODY]]: 1703; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1704; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 1705; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 1706; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1707; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP10]], 0.000000e+00 1708; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[SUM_1]], 2 1709; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i32 [[SUB]], i32 [[SUM_1]] 1710; CHECK-NEXT: [[INDVARS_IV_NEXT]] = sub nuw nsw i64 [[INDVARS_IV]], 1 1711; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 1712; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1713; CHECK: [[FOR_END_LOOPEXIT]]: 1714; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] 1715; CHECK-NEXT: br label %[[FOR_END]] 1716; CHECK: [[FOR_END]]: 1717; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1718; CHECK-NEXT: ret i32 [[TMP11]] 1719; 1720entry: 1721 %cmp.1 = icmp sgt i32 %N, 0 1722 br i1 %cmp.1, label %for.header, label %for.end 1723 1724for.header: ; preds = %entry 1725 %zext = zext i32 %N to i64 1726 br label %for.body 1727 1728for.body: ; preds = %header, %for.body 1729 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1730 %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ] 1731 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 1732 %0 = load float, ptr %arrayidx, align 4 1733 %cmp.2 = fcmp ogt float %0, 0.000000e+00 1734 %sub = sub nsw i32 %sum.1, 2 1735 %sum.2 = select i1 %cmp.2, i32 %sub, i32 %sum.1 1736 %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1 1737 %exitcond = icmp eq i64 %indvars.iv.next, %zext 1738 br i1 %exitcond, label %for.end, label %for.body 1739 1740for.end: ; preds = %for.body, %entry 1741 %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ] 1742 ret i32 %1 1743} 1744 1745define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly { 1746; CHECK-LABEL: define i32 @fcmp_0_mult_select1( 1747; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 1748; CHECK-NEXT: [[ENTRY:.*]]: 1749; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0 1750; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]] 1751; CHECK: [[FOR_HEADER]]: 1752; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 1753; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4 1754; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 1755; CHECK: [[VECTOR_PH]]: 1756; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4 1757; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] 1758; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 1759; CHECK: [[VECTOR_BODY]]: 1760; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 1761; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 1762; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1763; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]] 1764; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1765; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1766; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer 1767; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2) 1768; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] 1769; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1770; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1771; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1772; CHECK: [[MIDDLE_BLOCK]]: 1773; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]]) 1774; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] 1775; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] 1776; CHECK: [[SCALAR_PH]]: 1777; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1778; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ] 1779; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1780; CHECK: [[FOR_BODY]]: 1781; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] 1782; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ] 1783; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] 1784; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1785; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00 1786; CHECK-NEXT: [[MULT:%.*]] = mul nsw i32 [[SUM_1]], 2 1787; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i32 [[MULT]], i32 [[SUM_1]] 1788; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1789; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] 1790; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 1791; CHECK: [[FOR_END_LOOPEXIT]]: 1792; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] 1793; CHECK-NEXT: br label %[[FOR_END]] 1794; CHECK: [[FOR_END]]: 1795; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 1796; CHECK-NEXT: ret i32 [[TMP9]] 1797; 1798entry: 1799 %cmp.1 = icmp sgt i32 %N, 0 1800 br i1 %cmp.1, label %for.header, label %for.end 1801 1802for.header: ; preds = %entry 1803 %zext = zext i32 %N to i64 1804 br label %for.body 1805 1806for.body: ; preds = %for.body, %for.header 1807 %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ] 1808 %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ] 1809 %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv 1810 %0 = load float, ptr %arrayidx, align 4 1811 %cmp.2 = fcmp ogt float %0, 0.000000e+00 1812 %mult = mul nsw i32 %sum.1, 2 1813 %sum.2 = select i1 %cmp.2, i32 %mult, i32 %sum.1 1814 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1815 %exitcond = icmp eq i64 %indvars.iv.next, %zext 1816 br i1 %exitcond, label %for.end, label %for.body 1817 1818for.end: ; preds = %for.body, %entry 1819 %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ] 1820 ret i32 %1 1821} 1822 1823@table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1 1824 1825define i16 @non_reduction_index(i16 noundef %val) { 1826; CHECK-LABEL: define i16 @non_reduction_index( 1827; CHECK-SAME: i16 noundef [[VAL:%.*]]) { 1828; CHECK-NEXT: [[ENTRY:.*]]: 1829; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1830; CHECK: [[FOR_COND_CLEANUP:.*]]: 1831; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 1832; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] 1833; CHECK: [[FOR_BODY]]: 1834; CHECK-NEXT: [[I_05:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[SUB:%.*]], %[[FOR_BODY]] ] 1835; CHECK-NEXT: [[K_04:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 1836; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[I_05]] 1837; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 1 1838; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i16 [[TMP0]], [[VAL]] 1839; CHECK-NEXT: [[SUB]] = add nsw i16 [[I_05]], -1 1840; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i16 [[SUB]], i16 [[K_04]] 1841; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i16 [[SUB]], 0 1842; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] 1843; 1844entry: 1845 br label %for.body 1846 1847for.cond.cleanup: ; preds = %for.body 1848 %spec.select.lcssa = phi i16 [ %spec.select, %for.body ] 1849 ret i16 %spec.select.lcssa 1850 1851for.body: ; preds = %entry, %for.body 1852 %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ] 1853 %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ] 1854 %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05 1855 %0 = load i16, ptr %arrayidx, align 1 1856 %cmp1 = icmp ugt i16 %0, %val 1857 %sub = add nsw i16 %i.05, -1 1858 %spec.select = select i1 %cmp1, i16 %sub, i16 %k.04 1859 %cmp.not = icmp eq i16 %sub, 0 1860 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1861} 1862 1863@tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1 1864 1865define i16 @non_reduction_index_half(half noundef %val) { 1866; CHECK-LABEL: define i16 @non_reduction_index_half( 1867; CHECK-SAME: half noundef [[VAL:%.*]]) { 1868; CHECK-NEXT: [[ENTRY:.*]]: 1869; CHECK-NEXT: br label %[[FOR_BODY:.*]] 1870; CHECK: [[FOR_COND_CLEANUP:.*]]: 1871; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 1872; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] 1873; CHECK: [[FOR_BODY]]: 1874; CHECK-NEXT: [[I_05:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[SUB:%.*]], %[[FOR_BODY]] ] 1875; CHECK-NEXT: [[K_04:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 1876; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[I_05]] 1877; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX]], align 1 1878; CHECK-NEXT: [[FCMP1:%.*]] = fcmp ugt half [[TMP0]], [[VAL]] 1879; CHECK-NEXT: [[SUB]] = add nsw i16 [[I_05]], -1 1880; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[FCMP1]], i16 [[SUB]], i16 [[K_04]] 1881; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i16 [[SUB]], 0 1882; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] 1883; 1884entry: 1885 br label %for.body 1886 1887for.cond.cleanup: ; preds = %for.body 1888 %spec.select.lcssa = phi i16 [ %spec.select, %for.body ] 1889 ret i16 %spec.select.lcssa 1890 1891for.body: ; preds = %entry, %for.body 1892 %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ] 1893 %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ] 1894 %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05 1895 %0 = load half, ptr %arrayidx, align 1 1896 %fcmp1 = fcmp ugt half %0, %val 1897 %sub = add nsw i16 %i.05, -1 1898 %spec.select = select i1 %fcmp1, i16 %sub, i16 %k.04 1899 %cmp.not = icmp eq i16 %sub, 0 1900 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1901} 1902 1903;. 1904; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 1905; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 1906; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 1907; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 1908; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 1909; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 1910; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 1911; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 1912; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 1913; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 1914; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} 1915; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} 1916; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} 1917; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} 1918; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} 1919; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} 1920; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} 1921; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]} 1922; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} 1923; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]} 1924; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} 1925; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]} 1926; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} 1927; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]} 1928; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]} 1929; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]} 1930; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]} 1931; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]} 1932; CHECK: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]} 1933; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]} 1934; CHECK: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]} 1935; CHECK: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]} 1936;. 1937