1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \ 3; RUN: -force-ordered-reductions=false -hints-allow-reordering=false -S | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED 4; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \ 5; RUN: -force-ordered-reductions=false -hints-allow-reordering=true -S | FileCheck %s --check-prefix=CHECK-UNORDERED 6; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \ 7; RUN: -force-ordered-reductions=true -hints-allow-reordering=false -S | FileCheck %s --check-prefix=CHECK-ORDERED 8; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \ 9; RUN: -force-ordered-reductions=true -hints-allow-reordering=true -S | FileCheck %s --check-prefix=CHECK-UNORDERED 10; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \ 11; RUN: -hints-allow-reordering=false -S | FileCheck %s --check-prefix=CHECK-ORDERED 12; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 13; RUN: -hints-allow-reordering=false -S | FileCheck %s --check-prefix=CHECK-ORDERED-TF 14 15define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { 16; CHECK-NOT-VECTORIZED-LABEL: define float @fadd_strict 17; CHECK-NOT-VECTORIZED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 18; CHECK-NOT-VECTORIZED-NEXT: entry: 19; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 20; CHECK-NOT-VECTORIZED: for.body: 21; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 22; CHECK-NOT-VECTORIZED-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 23; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 24; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 25; CHECK-NOT-VECTORIZED-NEXT: [[ADD]] = fadd float [[TMP0]], [[SUM_07]] 26; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 27; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 28; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK-NOT-VECTORIZED: for.end: 30; CHECK-NOT-VECTORIZED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ] 31; CHECK-NOT-VECTORIZED-NEXT: ret float [[ADD_LCSSA]] 32; 33; CHECK-UNORDERED-LABEL: define float @fadd_strict 34; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 35; CHECK-UNORDERED-NEXT: entry: 36; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 37; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 38; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 39; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 40; CHECK-UNORDERED: vector.ph: 41; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 42; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 43; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 44; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 45; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 46; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 47; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 48; CHECK-UNORDERED: vector.body: 49; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 50; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 51; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 52; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 53; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 54; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 55; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] 56; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 57; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 58; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 59; CHECK-UNORDERED: middle.block: 60; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP9]]) 61; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 62; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 63; CHECK-UNORDERED: scalar.ph: 64; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 65; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 66; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 67; CHECK-UNORDERED: for.body: 68; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 69; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 70; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 71; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 72; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP12]], [[SUM_07]] 73; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 74; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 75; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 76; CHECK-UNORDERED: for.end: 77; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 78; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] 79; 80; CHECK-ORDERED-LABEL: define float @fadd_strict 81; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 82; CHECK-ORDERED-NEXT: entry: 83; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 84; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 85; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 86; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 87; CHECK-ORDERED: vector.ph: 88; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 89; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 90; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 91; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 92; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 93; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 94; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 95; CHECK-ORDERED: vector.body: 96; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 97; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 98; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 99; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 100; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 101; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 102; CHECK-ORDERED-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) 103; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 104; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 105; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 106; CHECK-ORDERED: middle.block: 107; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 108; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 109; CHECK-ORDERED: scalar.ph: 110; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 111; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 112; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 113; CHECK-ORDERED: for.body: 114; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 115; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 116; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 117; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 118; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP11]], [[SUM_07]] 119; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 120; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 121; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 122; CHECK-ORDERED: for.end: 123; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 124; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] 125; 126; CHECK-ORDERED-TF-LABEL: define float @fadd_strict 127; CHECK-ORDERED-TF-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 128; CHECK-ORDERED-TF-NEXT: entry: 129; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 130; CHECK-ORDERED-TF: vector.ph: 131; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 132; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 133; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 134; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 135; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 136; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 137; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 138; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 139; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 140; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 141; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 142; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 143; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 144; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) 145; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 146; CHECK-ORDERED-TF: vector.body: 147; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 148; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 149; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 150; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 151; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] 152; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 153; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP12]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 154; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) 155; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP13]]) 156; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 157; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 158; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 159; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractelement <vscale x 8 x i1> [[TMP15]], i32 0 160; CHECK-ORDERED-TF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 161; CHECK-ORDERED-TF: middle.block: 162; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 163; CHECK-ORDERED-TF: scalar.ph: 164; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 165; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 166; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 167; CHECK-ORDERED-TF: for.body: 168; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 169; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 170; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 171; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 172; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP17]], [[SUM_07]] 173; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 174; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 175; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 176; CHECK-ORDERED-TF: for.end: 177; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 178; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] 179; 180 181 182 183 184entry: 185 br label %for.body 186 187for.body: 188 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 189 %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] 190 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 191 %0 = load float, ptr %arrayidx, align 4 192 %add = fadd float %0, %sum.07 193 %iv.next = add nuw nsw i64 %iv, 1 194 %exitcond.not = icmp eq i64 %iv.next, %n 195 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 196 197for.end: 198 ret float %add 199} 200 201define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { 202; CHECK-NOT-VECTORIZED-LABEL: define float @fadd_strict_unroll 203; CHECK-NOT-VECTORIZED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 204; CHECK-NOT-VECTORIZED-NEXT: entry: 205; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 206; CHECK-NOT-VECTORIZED: for.body: 207; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 208; CHECK-NOT-VECTORIZED-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 209; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 210; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 211; CHECK-NOT-VECTORIZED-NEXT: [[ADD]] = fadd float [[TMP0]], [[SUM_07]] 212; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 213; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 214; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 215; CHECK-NOT-VECTORIZED: for.end: 216; CHECK-NOT-VECTORIZED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ] 217; CHECK-NOT-VECTORIZED-NEXT: ret float [[ADD_LCSSA]] 218; 219; CHECK-UNORDERED-LABEL: define float @fadd_strict_unroll 220; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 221; CHECK-UNORDERED-NEXT: entry: 222; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 223; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 224; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 225; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 226; CHECK-UNORDERED: vector.ph: 227; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 228; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 229; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 230; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 231; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 232; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 233; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 234; CHECK-UNORDERED: vector.body: 235; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 236; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 237; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 238; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] 239; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 240; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 241; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 242; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 243; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 244; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 245; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 246; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 247; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 248; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 249; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 250; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 251; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 252; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 253; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 254; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 255; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 256; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] 257; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd <vscale x 8 x float> [[WIDE_LOAD4]], [[VEC_PHI1]] 258; CHECK-UNORDERED-NEXT: [[TMP20]] = fadd <vscale x 8 x float> [[WIDE_LOAD5]], [[VEC_PHI2]] 259; CHECK-UNORDERED-NEXT: [[TMP21]] = fadd <vscale x 8 x float> [[WIDE_LOAD6]], [[VEC_PHI3]] 260; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 261; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 262; CHECK-UNORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 263; CHECK-UNORDERED: middle.block: 264; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP19]], [[TMP18]] 265; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd <vscale x 8 x float> [[TMP20]], [[BIN_RDX]] 266; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd <vscale x 8 x float> [[TMP21]], [[BIN_RDX7]] 267; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX8]]) 268; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 269; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 270; CHECK-UNORDERED: scalar.ph: 271; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 272; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 273; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 274; CHECK-UNORDERED: for.body: 275; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 276; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 277; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 278; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX]], align 4 279; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP24]], [[SUM_07]] 280; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 281; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 282; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 283; CHECK-UNORDERED: for.end: 284; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 285; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] 286; 287; CHECK-ORDERED-LABEL: define float @fadd_strict_unroll 288; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 289; CHECK-ORDERED-NEXT: entry: 290; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 291; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 292; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 293; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 294; CHECK-ORDERED: vector.ph: 295; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 296; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 297; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 298; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 299; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 300; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 301; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 302; CHECK-ORDERED: vector.body: 303; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 304; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 305; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 306; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 307; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 308; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 309; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 310; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 311; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 312; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 313; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 314; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 315; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 316; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 317; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 318; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 319; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 320; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 321; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) 322; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], <vscale x 8 x float> [[WIDE_LOAD1]]) 323; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP19]], <vscale x 8 x float> [[WIDE_LOAD2]]) 324; CHECK-ORDERED-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP20]], <vscale x 8 x float> [[WIDE_LOAD3]]) 325; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 326; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 327; CHECK-ORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 328; CHECK-ORDERED: middle.block: 329; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 330; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 331; CHECK-ORDERED: scalar.ph: 332; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 333; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 334; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 335; CHECK-ORDERED: for.body: 336; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 337; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 338; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 339; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 340; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP23]], [[SUM_07]] 341; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 342; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 343; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 344; CHECK-ORDERED: for.end: 345; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] 346; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] 347; 348; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll 349; CHECK-ORDERED-TF-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 350; CHECK-ORDERED-TF-NEXT: entry: 351; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 352; CHECK-ORDERED-TF: vector.ph: 353; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 354; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 355; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 356; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 357; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 358; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 359; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 360; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 32 361; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 362; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 363; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 364; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 365; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 366; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 367; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8 368; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] 369; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 370; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 371; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] 372; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 373; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 374; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] 375; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) 376; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) 377; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) 378; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) 379; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 380; CHECK-ORDERED-TF: vector.body: 381; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 382; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 383; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] 384; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] 385; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] 386; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] 387; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0 388; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] 389; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 390; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() 391; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 392; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] 393; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() 394; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 395; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] 396; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 397; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 398; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] 399; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 400; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) 401; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) 402; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) 403; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) 404; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP28]]) 405; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], <vscale x 8 x float> splat (float -0.000000e+00) 406; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], <vscale x 8 x float> [[TMP30]]) 407; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], <vscale x 8 x float> splat (float -0.000000e+00) 408; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], <vscale x 8 x float> [[TMP32]]) 409; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], <vscale x 8 x float> splat (float -0.000000e+00) 410; CHECK-ORDERED-TF-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], <vscale x 8 x float> [[TMP34]]) 411; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 412; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() 413; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 8 414; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] 415; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() 416; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 417; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] 418; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() 419; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 420; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], [[TMP43]] 421; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 422; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP38]], i64 [[TMP9]]) 423; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP41]], i64 [[TMP9]]) 424; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP44]], i64 [[TMP9]]) 425; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 426; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = extractelement <vscale x 8 x i1> [[TMP45]], i32 0 427; CHECK-ORDERED-TF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 428; CHECK-ORDERED-TF: middle.block: 429; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 430; CHECK-ORDERED-TF: scalar.ph: 431; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 432; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 433; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 434; CHECK-ORDERED-TF: for.body: 435; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 436; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 437; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 438; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = load float, ptr [[ARRAYIDX]], align 4 439; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP47]], [[SUM_07]] 440; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 441; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 442; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 443; CHECK-ORDERED-TF: for.end: 444; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] 445; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] 446; 447 448 449 450 451entry: 452 br label %for.body 453 454for.body: 455 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 456 %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] 457 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 458 %0 = load float, ptr %arrayidx, align 4 459 %add = fadd float %0, %sum.07 460 %iv.next = add nuw nsw i64 %iv, 1 461 %exitcond.not = icmp eq i64 %iv.next, %n 462 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 463 464for.end: 465 ret float %add 466} 467 468define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { 469; CHECK-NOT-VECTORIZED-LABEL: define void @fadd_strict_interleave 470; CHECK-NOT-VECTORIZED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 471; CHECK-NOT-VECTORIZED-NEXT: entry: 472; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 473; CHECK-NOT-VECTORIZED-NEXT: [[A1:%.*]] = load float, ptr [[A]], align 4 474; CHECK-NOT-VECTORIZED-NEXT: [[A2:%.*]] = load float, ptr [[ARRAYIDXA]], align 4 475; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 476; CHECK-NOT-VECTORIZED: for.body: 477; CHECK-NOT-VECTORIZED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[A2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] 478; CHECK-NOT-VECTORIZED-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[A1]], [[ENTRY]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] 479; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 480; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 481; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 482; CHECK-NOT-VECTORIZED-NEXT: [[ADD1]] = fadd float [[TMP0]], [[ADD_PHI2]] 483; CHECK-NOT-VECTORIZED-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 484; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] 485; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 486; CHECK-NOT-VECTORIZED-NEXT: [[ADD2]] = fadd float [[TMP1]], [[ADD_PHI1]] 487; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 488; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 489; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 490; CHECK-NOT-VECTORIZED: for.end: 491; CHECK-NOT-VECTORIZED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ] 492; CHECK-NOT-VECTORIZED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ] 493; CHECK-NOT-VECTORIZED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 494; CHECK-NOT-VECTORIZED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 495; CHECK-NOT-VECTORIZED-NEXT: ret void 496; 497; CHECK-UNORDERED-LABEL: define void @fadd_strict_interleave 498; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 499; CHECK-UNORDERED-NEXT: entry: 500; CHECK-UNORDERED-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 501; CHECK-UNORDERED-NEXT: [[A1:%.*]] = load float, ptr [[A]], align 4 502; CHECK-UNORDERED-NEXT: [[A2:%.*]] = load float, ptr [[ARRAYIDXA]], align 4 503; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = add i64 [[N]], -2 504; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 505; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 506; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 507; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 508; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] 509; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 510; CHECK-UNORDERED: vector.ph: 511; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 512; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 513; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] 514; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 515; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 516; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 517; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2 518; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A2]], i32 0 519; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A1]], i32 0 520; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 521; CHECK-UNORDERED: vector.body: 522; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 523; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 524; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] 525; CHECK-UNORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 526; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 527; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] 528; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 529; CHECK-UNORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]]) 530; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 531; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 532; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd <vscale x 4 x float> [[TMP14]], [[VEC_PHI1]] 533; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd <vscale x 4 x float> [[TMP15]], [[VEC_PHI]] 534; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 535; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 536; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 537; CHECK-UNORDERED: middle.block: 538; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP17]]) 539; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]]) 540; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 541; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 542; CHECK-UNORDERED: scalar.ph: 543; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] 544; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] 545; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 546; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 547; CHECK-UNORDERED: for.body: 548; CHECK-UNORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] 549; CHECK-UNORDERED-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] 550; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 551; CHECK-UNORDERED-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 552; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 553; CHECK-UNORDERED-NEXT: [[ADD1]] = fadd float [[TMP21]], [[ADD_PHI2]] 554; CHECK-UNORDERED-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 555; CHECK-UNORDERED-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] 556; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 557; CHECK-UNORDERED-NEXT: [[ADD2]] = fadd float [[TMP22]], [[ADD_PHI1]] 558; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 559; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 560; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 561; CHECK-UNORDERED: for.end: 562; CHECK-UNORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 563; CHECK-UNORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] 564; CHECK-UNORDERED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 565; CHECK-UNORDERED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 566; CHECK-UNORDERED-NEXT: ret void 567; 568; CHECK-ORDERED-LABEL: define void @fadd_strict_interleave 569; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 570; CHECK-ORDERED-NEXT: entry: 571; CHECK-ORDERED-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 572; CHECK-ORDERED-NEXT: [[A1:%.*]] = load float, ptr [[A]], align 4 573; CHECK-ORDERED-NEXT: [[A2:%.*]] = load float, ptr [[ARRAYIDXA]], align 4 574; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = add i64 [[N]], -2 575; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 576; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 577; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 578; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 579; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] 580; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 581; CHECK-ORDERED: vector.ph: 582; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 583; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 584; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] 585; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 586; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 587; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 588; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2 589; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 590; CHECK-ORDERED: vector.body: 591; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 592; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 593; CHECK-ORDERED-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 594; CHECK-ORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 595; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0 596; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] 597; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 598; CHECK-ORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]]) 599; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 600; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 601; CHECK-ORDERED-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]]) 602; CHECK-ORDERED-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP12]]) 603; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 604; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 605; CHECK-ORDERED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 606; CHECK-ORDERED: middle.block: 607; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 608; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 609; CHECK-ORDERED: scalar.ph: 610; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] 611; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] 612; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 613; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 614; CHECK-ORDERED: for.body: 615; CHECK-ORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] 616; CHECK-ORDERED-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] 617; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 618; CHECK-ORDERED-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 619; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 620; CHECK-ORDERED-NEXT: [[ADD1]] = fadd float [[TMP17]], [[ADD_PHI2]] 621; CHECK-ORDERED-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 622; CHECK-ORDERED-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] 623; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 624; CHECK-ORDERED-NEXT: [[ADD2]] = fadd float [[TMP18]], [[ADD_PHI1]] 625; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 626; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 627; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 628; CHECK-ORDERED: for.end: 629; CHECK-ORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 630; CHECK-ORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 631; CHECK-ORDERED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 632; CHECK-ORDERED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 633; CHECK-ORDERED-NEXT: ret void 634; 635; CHECK-ORDERED-TF-LABEL: define void @fadd_strict_interleave 636; CHECK-ORDERED-TF-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 637; CHECK-ORDERED-TF-NEXT: entry: 638; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 639; CHECK-ORDERED-TF-NEXT: [[A1:%.*]] = load float, ptr [[A]], align 4 640; CHECK-ORDERED-TF-NEXT: [[A2:%.*]] = load float, ptr [[ARRAYIDXA]], align 4 641; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = add i64 [[N]], -2 642; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 643; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 644; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 645; CHECK-ORDERED-TF: vector.ph: 646; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 647; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 648; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1 649; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]] 650; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] 651; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 652; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 653; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 654; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 2 655; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 656; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 657; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = sub i64 [[TMP2]], [[TMP10]] 658; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP2]], [[TMP10]] 659; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 0 660; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) 661; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 662; CHECK-ORDERED-TF: vector.body: 663; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 664; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 665; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 666; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 667; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 668; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 0 669; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] 670; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) 671; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP15]], i32 4, <vscale x 8 x i1> [[INTERLEAVED_MASK]], <vscale x 8 x float> poison) 672; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_MASKED_VEC]]) 673; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 674; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 675; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP17]], <vscale x 4 x float> splat (float -0.000000e+00) 676; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP18]]) 677; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> splat (float -0.000000e+00) 678; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP20]]) 679; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] 680; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP13]]) 681; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 682; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = extractelement <vscale x 4 x i1> [[TMP22]], i32 0 683; CHECK-ORDERED-TF-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 684; CHECK-ORDERED-TF: middle.block: 685; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 686; CHECK-ORDERED-TF: scalar.ph: 687; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] 688; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] 689; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 690; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 691; CHECK-ORDERED-TF: for.body: 692; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] 693; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] 694; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 695; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 696; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 697; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP24]], [[ADD_PHI2]] 698; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 699; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] 700; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 701; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP25]], [[ADD_PHI1]] 702; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 703; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 704; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 705; CHECK-ORDERED-TF: for.end: 706; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] 707; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] 708; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 709; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 710; CHECK-ORDERED-TF-NEXT: ret void 711; 712 713 714 715 716entry: 717 %arrayidxa = getelementptr inbounds float, ptr %a, i64 1 718 %a1 = load float, ptr %a, align 4 719 %a2 = load float, ptr %arrayidxa, align 4 720 br label %for.body 721 722for.body: 723 %add.phi1 = phi float [ %a2, %entry ], [ %add2, %for.body ] 724 %add.phi2 = phi float [ %a1, %entry ], [ %add1, %for.body ] 725 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 726 %arrayidxb1 = getelementptr inbounds float, ptr %b, i64 %iv 727 %0 = load float, ptr %arrayidxb1, align 4 728 %add1 = fadd float %0, %add.phi2 729 %or = or disjoint i64 %iv, 1 730 %arrayidxb2 = getelementptr inbounds float, ptr %b, i64 %or 731 %1 = load float, ptr %arrayidxb2, align 4 732 %add2 = fadd float %1, %add.phi1 733 %iv.next = add nuw nsw i64 %iv, 2 734 %exitcond.not = icmp eq i64 %iv.next, %n 735 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2 736 737for.end: 738 store float %add1, ptr %a, align 4 739 store float %add2, ptr %arrayidxa, align 4 740 ret void 741} 742 743define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { 744; CHECK-NOT-VECTORIZED-LABEL: define float @fadd_of_sum 745; CHECK-NOT-VECTORIZED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 746; CHECK-NOT-VECTORIZED-NEXT: entry: 747; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 748; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 749; CHECK-NOT-VECTORIZED-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 5.000000e-01 750; CHECK-NOT-VECTORIZED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 751; CHECK-NOT-VECTORIZED: for.body.preheader: 752; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 753; CHECK-NOT-VECTORIZED: for.body: 754; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 755; CHECK-NOT-VECTORIZED-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 756; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 757; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 758; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 759; CHECK-NOT-VECTORIZED-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 760; CHECK-NOT-VECTORIZED-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] 761; CHECK-NOT-VECTORIZED-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] 762; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 763; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 764; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6]] 765; CHECK-NOT-VECTORIZED: for.end.loopexit: 766; CHECK-NOT-VECTORIZED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ] 767; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_END]] 768; CHECK-NOT-VECTORIZED: for.end: 769; CHECK-NOT-VECTORIZED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] 770; CHECK-NOT-VECTORIZED-NEXT: ret float [[RES]] 771; 772; CHECK-UNORDERED-LABEL: define float @fadd_of_sum 773; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 774; CHECK-UNORDERED-NEXT: entry: 775; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 776; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 777; CHECK-UNORDERED-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 5.000000e-01 778; CHECK-UNORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 779; CHECK-UNORDERED: for.body.preheader: 780; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 781; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 782; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] 783; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 784; CHECK-UNORDERED: vector.ph: 785; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 786; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 787; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] 788; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 789; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 790; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 791; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 792; CHECK-UNORDERED: vector.body: 793; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 794; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] 795; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 796; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] 797; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 798; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 799; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] 800; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 801; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4 802; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 803; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[TMP12]] 804; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 805; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 806; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 807; CHECK-UNORDERED: middle.block: 808; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP13]]) 809; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 810; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 811; CHECK-UNORDERED: scalar.ph: 812; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 813; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 814; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 815; CHECK-UNORDERED: for.body: 816; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 817; CHECK-UNORDERED-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 818; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 819; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 820; CHECK-UNORDERED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 821; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 822; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP16]], [[TMP17]] 823; CHECK-UNORDERED-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] 824; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 825; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 826; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 827; CHECK-UNORDERED: for.end.loopexit: 828; CHECK-UNORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 829; CHECK-UNORDERED-NEXT: br label [[FOR_END]] 830; CHECK-UNORDERED: for.end: 831; CHECK-UNORDERED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] 832; CHECK-UNORDERED-NEXT: ret float [[RES]] 833; 834; CHECK-ORDERED-LABEL: define float @fadd_of_sum 835; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 836; CHECK-ORDERED-NEXT: entry: 837; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 838; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 839; CHECK-ORDERED-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 5.000000e-01 840; CHECK-ORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 841; CHECK-ORDERED: for.body.preheader: 842; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 843; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 844; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] 845; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 846; CHECK-ORDERED: vector.ph: 847; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 848; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 849; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] 850; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 851; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 852; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 853; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 854; CHECK-ORDERED: vector.body: 855; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 856; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] 857; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 858; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] 859; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 860; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 861; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] 862; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 863; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4 864; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 865; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP12]]) 866; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 867; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 868; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 869; CHECK-ORDERED: middle.block: 870; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 871; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 872; CHECK-ORDERED: scalar.ph: 873; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 874; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 875; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 876; CHECK-ORDERED: for.body: 877; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 878; CHECK-ORDERED-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 879; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 880; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 881; CHECK-ORDERED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 882; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 883; CHECK-ORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP15]], [[TMP16]] 884; CHECK-ORDERED-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] 885; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 886; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 887; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 888; CHECK-ORDERED: for.end.loopexit: 889; CHECK-ORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 890; CHECK-ORDERED-NEXT: br label [[FOR_END]] 891; CHECK-ORDERED: for.end: 892; CHECK-ORDERED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] 893; CHECK-ORDERED-NEXT: ret float [[RES]] 894; 895; CHECK-ORDERED-TF-LABEL: define float @fadd_of_sum 896; CHECK-ORDERED-TF-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 897; CHECK-ORDERED-TF-NEXT: entry: 898; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 899; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 900; CHECK-ORDERED-TF-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 5.000000e-01 901; CHECK-ORDERED-TF-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 902; CHECK-ORDERED-TF: for.body.preheader: 903; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 904; CHECK-ORDERED-TF: vector.ph: 905; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 906; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 907; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 908; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP3]] 909; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] 910; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 911; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 912; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 913; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 914; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 915; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] 916; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], [[TMP7]] 917; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 918; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) 919; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 920; CHECK-ORDERED-TF: vector.body: 921; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 922; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 923; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 924; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 925; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] 926; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 927; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) 928; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] 929; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 930; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) 931; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fadd <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] 932; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> splat (float -0.000000e+00) 933; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP17]]) 934; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] 935; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) 936; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 937; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP19]], i32 0 938; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 939; CHECK-ORDERED-TF: middle.block: 940; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 941; CHECK-ORDERED-TF: scalar.ph: 942; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 943; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 944; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 945; CHECK-ORDERED-TF: for.body: 946; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 947; CHECK-ORDERED-TF-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 948; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 949; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 950; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 951; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 952; CHECK-ORDERED-TF-NEXT: [[ADD:%.*]] = fadd float [[TMP21]], [[TMP22]] 953; CHECK-ORDERED-TF-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] 954; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 955; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 956; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 957; CHECK-ORDERED-TF: for.end.loopexit: 958; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 959; CHECK-ORDERED-TF-NEXT: br label [[FOR_END]] 960; CHECK-ORDERED-TF: for.end: 961; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] 962; CHECK-ORDERED-TF-NEXT: ret float [[RES]] 963; 964 965 966 967 968entry: 969 %arrayidx = getelementptr inbounds float, ptr %a, i64 1 970 %0 = load float, ptr %arrayidx, align 4 971 %cmp1 = fcmp ogt float %0, 5.000000e-01 972 br i1 %cmp1, label %for.body, label %for.end 973 974for.body: ; preds = %for.body 975 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 976 %res.014 = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ] 977 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv 978 %1 = load float, ptr %arrayidx2, align 4 979 %arrayidx4 = getelementptr inbounds float, ptr %b, i64 %iv 980 %2 = load float, ptr %arrayidx4, align 4 981 %add = fadd float %1, %2 982 %rdx = fadd float %res.014, %add 983 %iv.next = add nuw nsw i64 %iv, 1 984 %exitcond.not = icmp eq i64 %iv.next, %n 985 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2 986 987for.end: ; preds = %for.body, %entry 988 %res = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ] 989 ret float %res 990} 991 992define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { 993; CHECK-NOT-VECTORIZED-LABEL: define float @fadd_conditional 994; CHECK-NOT-VECTORIZED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 995; CHECK-NOT-VECTORIZED-NEXT: entry: 996; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 997; CHECK-NOT-VECTORIZED: for.body: 998; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 999; CHECK-NOT-VECTORIZED-NEXT: [[RES:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[FADD:%.*]], [[FOR_INC]] ] 1000; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1001; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1002; CHECK-NOT-VECTORIZED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP0]], 0.000000e+00 1003; CHECK-NOT-VECTORIZED-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1004; CHECK-NOT-VECTORIZED: if.then: 1005; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1006; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1007; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_INC]] 1008; CHECK-NOT-VECTORIZED: for.inc: 1009; CHECK-NOT-VECTORIZED-NEXT: [[PHI:%.*]] = phi float [ [[TMP1]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] 1010; CHECK-NOT-VECTORIZED-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] 1011; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1012; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1013; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6]] 1014; CHECK-NOT-VECTORIZED: for.end: 1015; CHECK-NOT-VECTORIZED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ] 1016; CHECK-NOT-VECTORIZED-NEXT: ret float [[RDX]] 1017; 1018; CHECK-UNORDERED-LABEL: define float @fadd_conditional 1019; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1020; CHECK-UNORDERED-NEXT: entry: 1021; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1022; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1023; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1024; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1025; CHECK-UNORDERED: vector.ph: 1026; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1027; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1028; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1029; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1030; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1031; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1032; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1033; CHECK-UNORDERED: vector.body: 1034; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1035; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] 1036; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1037; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1038; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1039; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1040; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer 1041; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] 1042; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 1043; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) 1044; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], splat (i1 true) 1045; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] 1046; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[PREDPHI]] 1047; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1048; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1049; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1050; CHECK-UNORDERED: middle.block: 1051; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP13]]) 1052; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1053; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1054; CHECK-UNORDERED: scalar.ph: 1055; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1056; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 1057; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 1058; CHECK-UNORDERED: for.body: 1059; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1060; CHECK-UNORDERED-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] 1061; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1062; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1063; CHECK-UNORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP16]], 0.000000e+00 1064; CHECK-UNORDERED-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1065; CHECK-UNORDERED: if.then: 1066; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1067; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1068; CHECK-UNORDERED-NEXT: br label [[FOR_INC]] 1069; CHECK-UNORDERED: for.inc: 1070; CHECK-UNORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP17]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] 1071; CHECK-UNORDERED-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] 1072; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1073; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1074; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1075; CHECK-UNORDERED: for.end: 1076; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 1077; CHECK-UNORDERED-NEXT: ret float [[RDX]] 1078; 1079; CHECK-ORDERED-LABEL: define float @fadd_conditional 1080; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1081; CHECK-ORDERED-NEXT: entry: 1082; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1083; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1084; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1085; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1086; CHECK-ORDERED: vector.ph: 1087; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1088; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1089; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1090; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1091; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1092; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1093; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1094; CHECK-ORDERED: vector.body: 1095; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1096; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] 1097; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1098; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1099; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1100; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1101; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer 1102; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] 1103; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 1104; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) 1105; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], splat (i1 true) 1106; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] 1107; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[PREDPHI]]) 1108; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1109; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1110; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1111; CHECK-ORDERED: middle.block: 1112; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1113; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1114; CHECK-ORDERED: scalar.ph: 1115; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1116; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 1117; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 1118; CHECK-ORDERED: for.body: 1119; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1120; CHECK-ORDERED-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] 1121; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1122; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1123; CHECK-ORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP15]], 0.000000e+00 1124; CHECK-ORDERED-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1125; CHECK-ORDERED: if.then: 1126; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1127; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1128; CHECK-ORDERED-NEXT: br label [[FOR_INC]] 1129; CHECK-ORDERED: for.inc: 1130; CHECK-ORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP16]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] 1131; CHECK-ORDERED-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] 1132; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1133; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1134; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1135; CHECK-ORDERED: for.end: 1136; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 1137; CHECK-ORDERED-NEXT: ret float [[RDX]] 1138; 1139; CHECK-ORDERED-TF-LABEL: define float @fadd_conditional 1140; CHECK-ORDERED-TF-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1141; CHECK-ORDERED-TF-NEXT: entry: 1142; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1143; CHECK-ORDERED-TF: vector.ph: 1144; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1145; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1146; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1147; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 1148; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1149; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1150; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1151; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 1152; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 1153; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 1154; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 1155; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1156; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 1157; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) 1158; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 1159; CHECK-ORDERED-TF: vector.body: 1160; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1161; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1162; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 1163; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 1164; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] 1165; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 1166; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) 1167; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_MASKED_LOAD]], zeroinitializer 1168; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer 1169; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP10]] 1170; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP15]], i32 0 1171; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[TMP14]], <vscale x 4 x float> poison) 1172; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00) 1173; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> splat (float -0.000000e+00) 1174; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP17]]) 1175; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 1176; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 1177; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 1178; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP19]], i32 0 1179; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1180; CHECK-ORDERED-TF: middle.block: 1181; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1182; CHECK-ORDERED-TF: scalar.ph: 1183; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1184; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 1185; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 1186; CHECK-ORDERED-TF: for.body: 1187; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1188; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] 1189; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1190; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1191; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP21]], 0.000000e+00 1192; CHECK-ORDERED-TF-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1193; CHECK-ORDERED-TF: if.then: 1194; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1195; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1196; CHECK-ORDERED-TF-NEXT: br label [[FOR_INC]] 1197; CHECK-ORDERED-TF: for.inc: 1198; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP22]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] 1199; CHECK-ORDERED-TF-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] 1200; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1201; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1202; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1203; CHECK-ORDERED-TF: for.end: 1204; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 1205; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] 1206; 1207 1208 1209 1210 1211entry: 1212 br label %for.body 1213 1214for.body: ; preds = %for.body 1215 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 1216 %res = phi float [ 1.000000e+00, %entry ], [ %fadd, %for.inc ] 1217 %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv 1218 %0 = load float, ptr %arrayidx, align 4 1219 %tobool = fcmp une float %0, 0.000000e+00 1220 br i1 %tobool, label %if.then, label %for.inc 1221 1222if.then: ; preds = %for.body 1223 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv 1224 %1 = load float, ptr %arrayidx2, align 4 1225 br label %for.inc 1226 1227for.inc: 1228 %phi = phi float [ %1, %if.then ], [ 3.000000e+00, %for.body ] 1229 %fadd = fadd float %res, %phi 1230 %iv.next = add nuw nsw i64 %iv, 1 1231 %exitcond.not = icmp eq i64 %iv.next, %n 1232 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2 1233 1234for.end: 1235 %rdx = phi float [ %fadd, %for.inc ] 1236 ret float %rdx 1237} 1238 1239; Negative test - loop contains multiple fadds which we cannot safely reorder 1240define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) #0 { 1241; CHECK-NOT-VECTORIZED-LABEL: define float @fadd_multiple 1242; CHECK-NOT-VECTORIZED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1243; CHECK-NOT-VECTORIZED-NEXT: entry: 1244; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 1245; CHECK-NOT-VECTORIZED: for.body: 1246; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1247; CHECK-NOT-VECTORIZED-NEXT: [[SUM:%.*]] = phi float [ -0.000000e+00, [[ENTRY]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] 1248; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1249; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1250; CHECK-NOT-VECTORIZED-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP0]] 1251; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1252; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1253; CHECK-NOT-VECTORIZED-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP1]] 1254; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1255; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1256; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0]] 1257; CHECK-NOT-VECTORIZED: for.end: 1258; CHECK-NOT-VECTORIZED-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ] 1259; CHECK-NOT-VECTORIZED-NEXT: ret float [[RDX]] 1260; 1261; CHECK-UNORDERED-LABEL: define float @fadd_multiple 1262; CHECK-UNORDERED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1263; CHECK-UNORDERED-NEXT: entry: 1264; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1265; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 1266; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1267; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1268; CHECK-UNORDERED: vector.ph: 1269; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1270; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 1271; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1272; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1273; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1274; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 1275; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1276; CHECK-UNORDERED: vector.body: 1277; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1278; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 1279; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1280; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 1281; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1282; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 1283; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = fadd <vscale x 8 x float> [[VEC_PHI]], [[WIDE_LOAD]] 1284; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1285; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 1286; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 1287; CHECK-UNORDERED-NEXT: [[TMP12]] = fadd <vscale x 8 x float> [[TMP9]], [[WIDE_LOAD1]] 1288; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1289; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1290; CHECK-UNORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1291; CHECK-UNORDERED: middle.block: 1292; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP12]]) 1293; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1294; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1295; CHECK-UNORDERED: scalar.ph: 1296; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1297; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ -0.000000e+00, [[ENTRY]] ] 1298; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 1299; CHECK-UNORDERED: for.body: 1300; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1301; CHECK-UNORDERED-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] 1302; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1303; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1304; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP15]] 1305; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1306; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1307; CHECK-UNORDERED-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP16]] 1308; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1309; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1310; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1311; CHECK-UNORDERED: for.end: 1312; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 1313; CHECK-UNORDERED-NEXT: ret float [[RDX]] 1314; 1315; CHECK-ORDERED-LABEL: define float @fadd_multiple 1316; CHECK-ORDERED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1317; CHECK-ORDERED-NEXT: entry: 1318; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 1319; CHECK-ORDERED: for.body: 1320; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1321; CHECK-ORDERED-NEXT: [[SUM:%.*]] = phi float [ -0.000000e+00, [[ENTRY]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] 1322; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1323; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1324; CHECK-ORDERED-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP0]] 1325; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1326; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1327; CHECK-ORDERED-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP1]] 1328; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1329; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1330; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1331; CHECK-ORDERED: for.end: 1332; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ] 1333; CHECK-ORDERED-NEXT: ret float [[RDX]] 1334; 1335; CHECK-ORDERED-TF-LABEL: define float @fadd_multiple 1336; CHECK-ORDERED-TF-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1337; CHECK-ORDERED-TF-NEXT: entry: 1338; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 1339; CHECK-ORDERED-TF: for.body: 1340; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1341; CHECK-ORDERED-TF-NEXT: [[SUM:%.*]] = phi float [ -0.000000e+00, [[ENTRY]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] 1342; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1343; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1344; CHECK-ORDERED-TF-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP0]] 1345; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1346; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1347; CHECK-ORDERED-TF-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP1]] 1348; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1349; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1350; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1351; CHECK-ORDERED-TF: for.end: 1352; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ] 1353; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] 1354; 1355 1356 1357 1358 1359entry: 1360 br label %for.body 1361 1362for.body: ; preds = %entry, %for.body 1363 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1364 %sum = phi float [ -0.000000e+00, %entry ], [ %add3, %for.body ] 1365 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1366 %0 = load float, ptr %arrayidx, align 4 1367 %add = fadd float %sum, %0 1368 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv 1369 %1 = load float, ptr %arrayidx2, align 4 1370 %add3 = fadd float %add, %1 1371 %iv.next = add nuw nsw i64 %iv, 1 1372 %exitcond.not = icmp eq i64 %iv.next, %n 1373 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1374 1375for.end: ; preds = %for.body 1376 %rdx = phi float [ %add3, %for.body ] 1377 ret float %rdx 1378} 1379 1380; Test case where loop has a call to the llvm.fmuladd intrinsic. 1381define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { 1382; CHECK-NOT-VECTORIZED-LABEL: define float @fmuladd_strict 1383; CHECK-NOT-VECTORIZED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1384; CHECK-NOT-VECTORIZED-NEXT: entry: 1385; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 1386; CHECK-NOT-VECTORIZED: for.body: 1387; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1388; CHECK-NOT-VECTORIZED-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1389; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1390; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1391; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1392; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1393; CHECK-NOT-VECTORIZED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float [[SUM_07]]) 1394; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1395; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1396; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1397; CHECK-NOT-VECTORIZED: for.end: 1398; CHECK-NOT-VECTORIZED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ] 1399; CHECK-NOT-VECTORIZED-NEXT: ret float [[MULADD_LCSSA]] 1400; 1401; CHECK-UNORDERED-LABEL: define float @fmuladd_strict 1402; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1403; CHECK-UNORDERED-NEXT: entry: 1404; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1405; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1406; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1407; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1408; CHECK-UNORDERED: vector.ph: 1409; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1410; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 1411; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1412; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1413; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1414; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 1415; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1416; CHECK-UNORDERED: vector.body: 1417; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1418; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] 1419; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] 1420; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] 1421; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] 1422; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1423; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 1424; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1425; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1426; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 1427; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 1428; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1429; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1430; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 1431; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1432; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 1433; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 1434; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 1435; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 1436; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 1437; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 1438; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1439; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 1440; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() 1441; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 1442; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] 1443; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 1444; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 1445; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] 1446; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() 1447; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 1448; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] 1449; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 1450; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 1451; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 1452; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP28]], align 4 1453; CHECK-UNORDERED-NEXT: [[TMP29]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) 1454; CHECK-UNORDERED-NEXT: [[TMP30]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) 1455; CHECK-UNORDERED-NEXT: [[TMP31]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) 1456; CHECK-UNORDERED-NEXT: [[TMP32]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) 1457; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1458; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1459; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1460; CHECK-UNORDERED: middle.block: 1461; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP30]], [[TMP29]] 1462; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd <vscale x 8 x float> [[TMP31]], [[BIN_RDX]] 1463; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd <vscale x 8 x float> [[TMP32]], [[BIN_RDX11]] 1464; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) 1465; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1466; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1467; CHECK-UNORDERED: scalar.ph: 1468; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1469; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1470; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 1471; CHECK-UNORDERED: for.body: 1472; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1473; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1474; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1475; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1476; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1477; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1478; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) 1479; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1480; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1481; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1482; CHECK-UNORDERED: for.end: 1483; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] 1484; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] 1485; 1486; CHECK-ORDERED-LABEL: define float @fmuladd_strict 1487; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1488; CHECK-ORDERED-NEXT: entry: 1489; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1490; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1491; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1492; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1493; CHECK-ORDERED: vector.ph: 1494; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1495; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 1496; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1497; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1498; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1499; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 1500; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1501; CHECK-ORDERED: vector.body: 1502; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1503; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] 1504; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1505; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 1506; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1507; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1508; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 1509; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 1510; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1511; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1512; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 1513; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1514; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 1515; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 1516; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 1517; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 1518; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 1519; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 1520; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1521; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 1522; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() 1523; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 1524; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] 1525; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 1526; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 1527; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] 1528; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() 1529; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 1530; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] 1531; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 1532; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 1533; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 1534; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP28]], align 4 1535; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] 1536; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] 1537; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] 1538; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] 1539; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP29]]) 1540; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], <vscale x 8 x float> [[TMP30]]) 1541; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], <vscale x 8 x float> [[TMP31]]) 1542; CHECK-ORDERED-NEXT: [[TMP36]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], <vscale x 8 x float> [[TMP32]]) 1543; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1544; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1545; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1546; CHECK-ORDERED: middle.block: 1547; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1548; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1549; CHECK-ORDERED: scalar.ph: 1550; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1551; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1552; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 1553; CHECK-ORDERED: for.body: 1554; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1555; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1556; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1557; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1558; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1559; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1560; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) 1561; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1562; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1563; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1564; CHECK-ORDERED: for.end: 1565; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] 1566; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] 1567; 1568; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict 1569; CHECK-ORDERED-TF-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1570; CHECK-ORDERED-TF-NEXT: entry: 1571; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1572; CHECK-ORDERED-TF: vector.ph: 1573; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1574; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1575; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1576; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 1577; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1578; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1579; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1580; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 32 1581; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 1582; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 1583; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 1584; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1585; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 1586; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 1587; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8 1588; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] 1589; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1590; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1591; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] 1592; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 1593; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 1594; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] 1595; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) 1596; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) 1597; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) 1598; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) 1599; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 1600; CHECK-ORDERED-TF: vector.body: 1601; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1602; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1603; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] 1604; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] 1605; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] 1606; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] 1607; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0 1608; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] 1609; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 1610; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() 1611; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 1612; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] 1613; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() 1614; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 1615; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] 1616; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 1617; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 1618; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] 1619; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 1620; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) 1621; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) 1622; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) 1623; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] 1624; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 1625; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() 1626; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 1627; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] 1628; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() 1629; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 1630; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] 1631; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() 1632; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 1633; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] 1634; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 1635; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) 1636; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) 1637; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) 1638; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] 1639; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] 1640; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] 1641; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] 1642; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) 1643; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP43]]) 1644; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> splat (float -0.000000e+00) 1645; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], <vscale x 8 x float> [[TMP45]]) 1646; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> splat (float -0.000000e+00) 1647; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], <vscale x 8 x float> [[TMP47]]) 1648; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> splat (float -0.000000e+00) 1649; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], <vscale x 8 x float> [[TMP49]]) 1650; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 1651; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() 1652; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 1653; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] 1654; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() 1655; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 1656; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] 1657; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() 1658; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 1659; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] 1660; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 1661; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) 1662; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) 1663; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) 1664; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 1665; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement <vscale x 8 x i1> [[TMP60]], i32 0 1666; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1667; CHECK-ORDERED-TF: middle.block: 1668; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1669; CHECK-ORDERED-TF: scalar.ph: 1670; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1671; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1672; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 1673; CHECK-ORDERED-TF: for.body: 1674; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1675; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1676; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1677; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1678; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1679; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1680; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) 1681; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1682; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1683; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1684; CHECK-ORDERED-TF: for.end: 1685; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] 1686; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] 1687; 1688 1689 1690 1691 1692entry: 1693 br label %for.body 1694 1695for.body: 1696 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1697 %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] 1698 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1699 %0 = load float, ptr %arrayidx, align 4 1700 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv 1701 %1 = load float, ptr %arrayidx2, align 4 1702 %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) 1703 %iv.next = add nuw nsw i64 %iv, 1 1704 %exitcond.not = icmp eq i64 %iv.next, %n 1705 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 1706 1707for.end: 1708 ret float %muladd 1709} 1710 1711; Same as above but where the call to the llvm.fmuladd intrinsic has a fast-math flag. 1712define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { 1713; CHECK-NOT-VECTORIZED-LABEL: define float @fmuladd_strict_fmf 1714; CHECK-NOT-VECTORIZED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1715; CHECK-NOT-VECTORIZED-NEXT: entry: 1716; CHECK-NOT-VECTORIZED-NEXT: br label [[FOR_BODY:%.*]] 1717; CHECK-NOT-VECTORIZED: for.body: 1718; CHECK-NOT-VECTORIZED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1719; CHECK-NOT-VECTORIZED-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1720; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1721; CHECK-NOT-VECTORIZED-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1722; CHECK-NOT-VECTORIZED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1723; CHECK-NOT-VECTORIZED-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1724; CHECK-NOT-VECTORIZED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float [[SUM_07]]) 1725; CHECK-NOT-VECTORIZED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1726; CHECK-NOT-VECTORIZED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1727; CHECK-NOT-VECTORIZED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1728; CHECK-NOT-VECTORIZED: for.end: 1729; CHECK-NOT-VECTORIZED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ] 1730; CHECK-NOT-VECTORIZED-NEXT: ret float [[MULADD_LCSSA]] 1731; 1732; CHECK-UNORDERED-LABEL: define float @fmuladd_strict_fmf 1733; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1734; CHECK-UNORDERED-NEXT: entry: 1735; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1736; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1737; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1738; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1739; CHECK-UNORDERED: vector.ph: 1740; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1741; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 1742; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1743; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1744; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1745; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 1746; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1747; CHECK-UNORDERED: vector.body: 1748; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1749; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] 1750; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] 1751; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] 1752; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] 1753; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1754; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 1755; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1756; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1757; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 1758; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 1759; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1760; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1761; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 1762; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1763; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 1764; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 1765; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 1766; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 1767; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 1768; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 1769; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1770; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 1771; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() 1772; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 1773; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] 1774; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 1775; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 1776; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] 1777; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() 1778; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 1779; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] 1780; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 1781; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 1782; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 1783; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP28]], align 4 1784; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) 1785; CHECK-UNORDERED-NEXT: [[TMP30]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) 1786; CHECK-UNORDERED-NEXT: [[TMP31]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) 1787; CHECK-UNORDERED-NEXT: [[TMP32]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) 1788; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1789; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1790; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1791; CHECK-UNORDERED: middle.block: 1792; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan <vscale x 8 x float> [[TMP30]], [[TMP29]] 1793; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan <vscale x 8 x float> [[TMP31]], [[BIN_RDX]] 1794; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan <vscale x 8 x float> [[TMP32]], [[BIN_RDX11]] 1795; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) 1796; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1797; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1798; CHECK-UNORDERED: scalar.ph: 1799; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1800; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1801; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] 1802; CHECK-UNORDERED: for.body: 1803; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1804; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1805; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1806; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1807; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1808; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1809; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) 1810; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1811; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1812; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1813; CHECK-UNORDERED: for.end: 1814; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] 1815; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] 1816; 1817; CHECK-ORDERED-LABEL: define float @fmuladd_strict_fmf 1818; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1819; CHECK-ORDERED-NEXT: entry: 1820; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1821; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1822; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1823; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1824; CHECK-ORDERED: vector.ph: 1825; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1826; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 1827; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1828; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1829; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1830; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 1831; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] 1832; CHECK-ORDERED: vector.body: 1833; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1834; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] 1835; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1836; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] 1837; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1838; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1839; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 1840; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] 1841; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1842; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1843; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] 1844; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1845; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 1846; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] 1847; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 1848; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP11]], align 4 1849; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 1850; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 1851; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] 1852; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 1853; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() 1854; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 1855; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] 1856; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 1857; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 1858; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] 1859; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() 1860; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 1861; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] 1862; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 1863; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 1864; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 1865; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP28]], align 4 1866; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] 1867; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] 1868; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] 1869; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] 1870; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP29]]) 1871; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], <vscale x 8 x float> [[TMP30]]) 1872; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], <vscale x 8 x float> [[TMP31]]) 1873; CHECK-ORDERED-NEXT: [[TMP36]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], <vscale x 8 x float> [[TMP32]]) 1874; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1875; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1876; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1877; CHECK-ORDERED: middle.block: 1878; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1879; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1880; CHECK-ORDERED: scalar.ph: 1881; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1882; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1883; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] 1884; CHECK-ORDERED: for.body: 1885; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1886; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1887; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1888; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1889; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1890; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1891; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) 1892; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1893; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1894; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1895; CHECK-ORDERED: for.end: 1896; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] 1897; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] 1898; 1899; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf 1900; CHECK-ORDERED-TF-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1901; CHECK-ORDERED-TF-NEXT: entry: 1902; CHECK-ORDERED-TF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1903; CHECK-ORDERED-TF: vector.ph: 1904; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1905; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 1906; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1907; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 1908; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1909; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1910; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1911; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 32 1912; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 1913; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 1914; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 1915; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1916; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 1917; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 1918; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8 1919; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] 1920; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1921; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 1922; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] 1923; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 1924; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 1925; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] 1926; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) 1927; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) 1928; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) 1929; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) 1930; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] 1931; CHECK-ORDERED-TF: vector.body: 1932; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1933; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1934; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] 1935; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] 1936; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] 1937; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] 1938; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0 1939; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] 1940; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 1941; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() 1942; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 1943; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] 1944; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() 1945; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 1946; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] 1947; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 1948; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 1949; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] 1950; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 1951; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) 1952; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) 1953; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) 1954; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]] 1955; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 1956; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() 1957; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 1958; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] 1959; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() 1960; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 1961; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] 1962; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() 1963; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 1964; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] 1965; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) 1966; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) 1967; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) 1968; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) 1969; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] 1970; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] 1971; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] 1972; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] 1973; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) 1974; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP43]]) 1975; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> splat (float -0.000000e+00) 1976; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], <vscale x 8 x float> [[TMP45]]) 1977; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> splat (float -0.000000e+00) 1978; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], <vscale x 8 x float> [[TMP47]]) 1979; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> splat (float -0.000000e+00) 1980; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], <vscale x 8 x float> [[TMP49]]) 1981; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 1982; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() 1983; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 1984; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] 1985; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() 1986; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 1987; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] 1988; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() 1989; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 1990; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] 1991; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 1992; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) 1993; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) 1994; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) 1995; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 1996; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement <vscale x 8 x i1> [[TMP60]], i32 0 1997; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1998; CHECK-ORDERED-TF: middle.block: 1999; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 2000; CHECK-ORDERED-TF: scalar.ph: 2001; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2002; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 2003; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] 2004; CHECK-ORDERED-TF: for.body: 2005; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2006; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 2007; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 2008; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 2009; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 2010; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 2011; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) 2012; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2013; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2014; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 2015; CHECK-ORDERED-TF: for.end: 2016; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] 2017; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] 2018; 2019 2020 2021 2022 2023entry: 2024 br label %for.body 2025 2026for.body: 2027 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2028 %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] 2029 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 2030 %0 = load float, ptr %arrayidx, align 4 2031 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv 2032 %1 = load float, ptr %arrayidx2, align 4 2033 %muladd = tail call nnan float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) 2034 %iv.next = add nuw nsw i64 %iv, 1 2035 %exitcond.not = icmp eq i64 %iv.next, %n 2036 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 2037 2038for.end: 2039 ret float %muladd 2040} 2041 2042declare float @llvm.fmuladd.f32(float, float, float) 2043 2044attributes #0 = { vscale_range(1, 16) } 2045!0 = distinct !{!0, !3, !6, !8} 2046!1 = distinct !{!1, !3, !7, !8} 2047!2 = distinct !{!2, !4, !6, !8} 2048!3 = !{!"llvm.loop.vectorize.width", i32 8} 2049!4 = !{!"llvm.loop.vectorize.width", i32 4} 2050!5 = !{!"llvm.loop.vectorize.width", i32 2} 2051!6 = !{!"llvm.loop.interleave.count", i32 1} 2052!7 = !{!"llvm.loop.interleave.count", i32 4} 2053!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 2054