1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -prefer-inloop-reductions \ 4; RUN: -force-tail-folding-style=data-with-evl \ 5; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 6; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=IF-EVL 7 8; RUN: opt -passes=loop-vectorize \ 9; RUN: -prefer-inloop-reductions \ 10; RUN: -force-tail-folding-style=none \ 11; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 12; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=NO-VP 13 14define i32 @add(ptr %a, i64 %n, i32 %start) { 15; IF-EVL-LABEL: @add( 16; IF-EVL-NEXT: entry: 17; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 18; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 19; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 20; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 21; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 22; IF-EVL: vector.ph: 23; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 24; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 25; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 26; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 27; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 28; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 29; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 30; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 31; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 32; IF-EVL: vector.body: 33; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 34; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 35; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 36; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 37; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 38; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 39; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 40; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 41; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 42; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 43; IF-EVL-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] 44; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 45; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 46; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 47; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 48; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 49; IF-EVL: middle.block: 50; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 51; IF-EVL: scalar.ph: 52; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 53; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 54; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 55; IF-EVL: for.body: 56; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 57; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 58; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 59; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 60; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[TMP18]], [[RDX]] 61; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 62; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 63; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 64; IF-EVL: for.end: 65; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 66; IF-EVL-NEXT: ret i32 [[ADD_LCSSA]] 67; 68; NO-VP-LABEL: @add( 69; NO-VP-NEXT: entry: 70; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 71; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 72; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 73; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 74; NO-VP: vector.ph: 75; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 76; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 77; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 78; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 79; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 80; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 81; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 82; NO-VP: vector.body: 83; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 84; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 85; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 86; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 87; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 88; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 89; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 90; NO-VP-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]] 91; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 92; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 93; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 94; NO-VP: middle.block: 95; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 96; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 97; NO-VP: scalar.ph: 98; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 99; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 100; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 101; NO-VP: for.body: 102; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 103; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 104; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 105; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 106; NO-VP-NEXT: [[ADD]] = add nsw i32 [[TMP12]], [[RDX]] 107; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 108; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 109; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 110; NO-VP: for.end: 111; NO-VP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 112; NO-VP-NEXT: ret i32 [[ADD_LCSSA]] 113; 114entry: 115 br label %for.body 116 117for.body: 118 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 119 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] 120 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 121 %0 = load i32, ptr %arrayidx, align 4 122 %add = add nsw i32 %0, %rdx 123 %iv.next = add nuw nsw i64 %iv, 1 124 %exitcond.not = icmp eq i64 %iv.next, %n 125 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 126 127for.end: 128 ret i32 %add 129} 130 131; not support mul reduction for scalable vector 132define i32 @mul(ptr %a, i64 %n, i32 %start) { 133; IF-EVL-LABEL: @mul( 134; IF-EVL-NEXT: entry: 135; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 136; IF-EVL: for.body: 137; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 138; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 139; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] 140; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 141; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] 142; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 143; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 144; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 145; IF-EVL: for.end: 146; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ] 147; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] 148; 149; NO-VP-LABEL: @mul( 150; NO-VP-NEXT: entry: 151; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 152; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 153; NO-VP: vector.ph: 154; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 155; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 156; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 157; NO-VP: vector.body: 158; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 159; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 160; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 161; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 162; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] 163; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 164; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 165; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 166; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 167; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) 168; NO-VP-NEXT: [[TMP7]] = mul i32 [[TMP6]], [[VEC_PHI]] 169; NO-VP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD2]]) 170; NO-VP-NEXT: [[TMP9]] = mul i32 [[TMP8]], [[VEC_PHI1]] 171; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 172; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 173; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 174; NO-VP: middle.block: 175; NO-VP-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP9]], [[TMP7]] 176; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 177; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 178; NO-VP: scalar.ph: 179; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 180; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 181; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 182; NO-VP: for.body: 183; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 184; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 185; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 186; NO-VP-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 187; NO-VP-NEXT: [[MUL]] = mul nsw i32 [[TMP11]], [[RDX]] 188; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 189; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 190; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 191; NO-VP: for.end: 192; NO-VP-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 193; NO-VP-NEXT: ret i32 [[MUL_LCSSA]] 194; 195entry: 196 br label %for.body 197 198for.body: 199 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 200 %rdx = phi i32 [ %start, %entry ], [ %mul, %for.body ] 201 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 202 %0 = load i32, ptr %arrayidx, align 4 203 %mul = mul nsw i32 %0, %rdx 204 %iv.next = add nuw nsw i64 %iv, 1 205 %exitcond.not = icmp eq i64 %iv.next, %n 206 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 207 208for.end: 209 ret i32 %mul 210} 211 212define i32 @or(ptr %a, i64 %n, i32 %start) { 213; IF-EVL-LABEL: @or( 214; IF-EVL-NEXT: entry: 215; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 216; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 217; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 218; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 219; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 220; IF-EVL: vector.ph: 221; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 222; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 223; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 224; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 225; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 226; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 227; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 228; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 229; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 230; IF-EVL: vector.body: 231; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 232; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 233; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 234; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 235; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 236; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 237; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 238; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 239; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 240; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.or.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 241; IF-EVL-NEXT: [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]] 242; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 243; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 244; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 245; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 246; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 247; IF-EVL: middle.block: 248; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 249; IF-EVL: scalar.ph: 250; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 251; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 252; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 253; IF-EVL: for.body: 254; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 255; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] 256; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 257; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 258; IF-EVL-NEXT: [[OR]] = or i32 [[TMP18]], [[RDX]] 259; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 260; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 261; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 262; IF-EVL: for.end: 263; IF-EVL-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 264; IF-EVL-NEXT: ret i32 [[OR_LCSSA]] 265; 266; NO-VP-LABEL: @or( 267; NO-VP-NEXT: entry: 268; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 269; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 270; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 271; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 272; NO-VP: vector.ph: 273; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 274; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 275; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 276; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 277; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 278; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 279; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 280; NO-VP: vector.body: 281; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 282; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 283; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 284; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 285; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 286; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 287; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 288; NO-VP-NEXT: [[TMP10]] = or i32 [[TMP9]], [[VEC_PHI]] 289; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 290; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 291; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 292; NO-VP: middle.block: 293; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 294; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 295; NO-VP: scalar.ph: 296; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 297; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 298; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 299; NO-VP: for.body: 300; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 301; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] 302; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 303; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 304; NO-VP-NEXT: [[OR]] = or i32 [[TMP12]], [[RDX]] 305; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 306; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 307; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 308; NO-VP: for.end: 309; NO-VP-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 310; NO-VP-NEXT: ret i32 [[OR_LCSSA]] 311; 312entry: 313 br label %for.body 314 315for.body: 316 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 317 %rdx = phi i32 [ %start, %entry ], [ %or, %for.body ] 318 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 319 %0 = load i32, ptr %arrayidx, align 4 320 %or = or i32 %0, %rdx 321 %iv.next = add nuw nsw i64 %iv, 1 322 %exitcond.not = icmp eq i64 %iv.next, %n 323 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 324 325for.end: 326 ret i32 %or 327} 328 329define i32 @and(ptr %a, i64 %n, i32 %start) { 330; IF-EVL-LABEL: @and( 331; IF-EVL-NEXT: entry: 332; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 333; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 334; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 335; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 336; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 337; IF-EVL: vector.ph: 338; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 339; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 340; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 341; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 342; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 343; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 344; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 345; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 346; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 347; IF-EVL: vector.body: 348; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 349; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 350; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 351; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 352; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 353; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 354; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 355; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 356; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 357; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.and.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 358; IF-EVL-NEXT: [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]] 359; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 360; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 361; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 362; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 363; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 364; IF-EVL: middle.block: 365; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 366; IF-EVL: scalar.ph: 367; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 368; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 369; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 370; IF-EVL: for.body: 371; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 372; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] 373; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 374; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 375; IF-EVL-NEXT: [[AND]] = and i32 [[TMP18]], [[RDX]] 376; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 377; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 378; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 379; IF-EVL: for.end: 380; IF-EVL-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 381; IF-EVL-NEXT: ret i32 [[AND_LCSSA]] 382; 383; NO-VP-LABEL: @and( 384; NO-VP-NEXT: entry: 385; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 386; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 387; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 388; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 389; NO-VP: vector.ph: 390; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 391; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 392; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 393; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 394; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 395; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 396; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 397; NO-VP: vector.body: 398; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 399; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 400; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 401; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 402; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 403; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 404; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 405; NO-VP-NEXT: [[TMP10]] = and i32 [[TMP9]], [[VEC_PHI]] 406; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 407; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 408; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 409; NO-VP: middle.block: 410; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 411; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 412; NO-VP: scalar.ph: 413; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 414; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 415; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 416; NO-VP: for.body: 417; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 418; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] 419; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 420; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 421; NO-VP-NEXT: [[AND]] = and i32 [[TMP12]], [[RDX]] 422; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 423; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 424; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 425; NO-VP: for.end: 426; NO-VP-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 427; NO-VP-NEXT: ret i32 [[AND_LCSSA]] 428; 429entry: 430 br label %for.body 431 432for.body: 433 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 434 %rdx = phi i32 [ %start, %entry ], [ %and, %for.body ] 435 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 436 %0 = load i32, ptr %arrayidx, align 4 437 %and = and i32 %0, %rdx 438 %iv.next = add nuw nsw i64 %iv, 1 439 %exitcond.not = icmp eq i64 %iv.next, %n 440 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 441 442for.end: 443 ret i32 %and 444} 445 446define i32 @xor(ptr %a, i64 %n, i32 %start) { 447; IF-EVL-LABEL: @xor( 448; IF-EVL-NEXT: entry: 449; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 450; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 451; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 452; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 453; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 454; IF-EVL: vector.ph: 455; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 456; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 457; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 458; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 459; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 460; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 461; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 462; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 463; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 464; IF-EVL: vector.body: 465; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 466; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 467; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 468; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 469; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 470; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 471; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 472; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 473; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 474; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 475; IF-EVL-NEXT: [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]] 476; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 477; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 478; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 479; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 480; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 481; IF-EVL: middle.block: 482; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 483; IF-EVL: scalar.ph: 484; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 485; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 486; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 487; IF-EVL: for.body: 488; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 489; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] 490; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 491; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 492; IF-EVL-NEXT: [[XOR]] = xor i32 [[TMP18]], [[RDX]] 493; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 494; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 495; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 496; IF-EVL: for.end: 497; IF-EVL-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 498; IF-EVL-NEXT: ret i32 [[XOR_LCSSA]] 499; 500; NO-VP-LABEL: @xor( 501; NO-VP-NEXT: entry: 502; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 503; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 504; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 505; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 506; NO-VP: vector.ph: 507; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 508; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 509; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 510; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 511; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 512; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 513; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 514; NO-VP: vector.body: 515; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 516; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 517; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 518; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 519; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 520; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 521; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 522; NO-VP-NEXT: [[TMP10]] = xor i32 [[TMP9]], [[VEC_PHI]] 523; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 524; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 525; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 526; NO-VP: middle.block: 527; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 528; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 529; NO-VP: scalar.ph: 530; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 531; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 532; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 533; NO-VP: for.body: 534; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 535; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] 536; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 537; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 538; NO-VP-NEXT: [[XOR]] = xor i32 [[TMP12]], [[RDX]] 539; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 540; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 541; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 542; NO-VP: for.end: 543; NO-VP-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 544; NO-VP-NEXT: ret i32 [[XOR_LCSSA]] 545; 546entry: 547 br label %for.body 548 549for.body: 550 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 551 %rdx = phi i32 [ %start, %entry ], [ %xor, %for.body ] 552 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 553 %0 = load i32, ptr %arrayidx, align 4 554 %xor = xor i32 %0, %rdx 555 %iv.next = add nuw nsw i64 %iv, 1 556 %exitcond.not = icmp eq i64 %iv.next, %n 557 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 558 559for.end: 560 ret i32 %xor 561} 562 563define i32 @smin(ptr %a, i64 %n, i32 %start) { 564; IF-EVL-LABEL: @smin( 565; IF-EVL-NEXT: entry: 566; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 567; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 568; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 569; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 570; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 571; IF-EVL: vector.ph: 572; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 573; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 574; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 575; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 576; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 577; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 578; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 579; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 580; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 581; IF-EVL: vector.body: 582; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 583; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 584; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 585; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 586; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 587; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 588; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 589; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 590; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 591; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 592; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) 593; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 594; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 595; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 596; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 597; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 598; IF-EVL: middle.block: 599; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 600; IF-EVL: scalar.ph: 601; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 602; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 603; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 604; IF-EVL: for.body: 605; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 606; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] 607; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 608; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 609; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP17]], [[RDX]] 610; IF-EVL-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] 611; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 612; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 613; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 614; IF-EVL: for.end: 615; IF-EVL-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 616; IF-EVL-NEXT: ret i32 [[SMIN_LCSSA]] 617; 618; NO-VP-LABEL: @smin( 619; NO-VP-NEXT: entry: 620; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 621; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 622; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 623; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 624; NO-VP: vector.ph: 625; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 626; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 627; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 628; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 629; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 630; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 631; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 632; NO-VP: vector.body: 633; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 634; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 635; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 636; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 637; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 638; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 639; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 640; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) 641; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 642; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 643; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 644; NO-VP: middle.block: 645; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 646; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 647; NO-VP: scalar.ph: 648; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 649; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 650; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 651; NO-VP: for.body: 652; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 653; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] 654; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 655; NO-VP-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 656; NO-VP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[RDX]] 657; NO-VP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]] 658; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 659; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 660; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 661; NO-VP: for.end: 662; NO-VP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 663; NO-VP-NEXT: ret i32 [[SMIN_LCSSA]] 664; 665entry: 666 br label %for.body 667 668for.body: 669 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 670 %rdx = phi i32 [ %start, %entry ], [ %smin, %for.body ] 671 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 672 %0 = load i32, ptr %arrayidx, align 4 673 %cmp.i = icmp slt i32 %0, %rdx 674 %smin = select i1 %cmp.i, i32 %0, i32 %rdx 675 %iv.next = add nuw nsw i64 %iv, 1 676 %exitcond.not = icmp eq i64 %iv.next, %n 677 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 678 679for.end: 680 ret i32 %smin 681} 682 683define i32 @smax(ptr %a, i64 %n, i32 %start) { 684; IF-EVL-LABEL: @smax( 685; IF-EVL-NEXT: entry: 686; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 687; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 688; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 689; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 690; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 691; IF-EVL: vector.ph: 692; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 693; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 694; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 695; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 696; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 697; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 698; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 699; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 700; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 701; IF-EVL: vector.body: 702; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 703; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 704; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 705; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 706; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 707; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 708; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 709; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 710; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 711; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -2147483648, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 712; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) 713; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 714; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 715; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 716; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 717; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 718; IF-EVL: middle.block: 719; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 720; IF-EVL: scalar.ph: 721; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 722; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 723; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 724; IF-EVL: for.body: 725; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 726; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] 727; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 728; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 729; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP17]], [[RDX]] 730; IF-EVL-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] 731; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 732; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 733; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 734; IF-EVL: for.end: 735; IF-EVL-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 736; IF-EVL-NEXT: ret i32 [[SMAX_LCSSA]] 737; 738; NO-VP-LABEL: @smax( 739; NO-VP-NEXT: entry: 740; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 741; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 742; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 743; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 744; NO-VP: vector.ph: 745; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 746; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 747; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 748; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 749; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 750; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 751; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 752; NO-VP: vector.body: 753; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 754; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 755; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 756; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 757; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 758; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 759; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 760; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) 761; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 762; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 763; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 764; NO-VP: middle.block: 765; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 766; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 767; NO-VP: scalar.ph: 768; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 769; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 770; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 771; NO-VP: for.body: 772; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 773; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] 774; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 775; NO-VP-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 776; NO-VP-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP11]], [[RDX]] 777; NO-VP-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]] 778; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 779; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 780; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 781; NO-VP: for.end: 782; NO-VP-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 783; NO-VP-NEXT: ret i32 [[SMAX_LCSSA]] 784; 785entry: 786 br label %for.body 787 788for.body: 789 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 790 %rdx = phi i32 [ %start, %entry ], [ %smax, %for.body ] 791 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 792 %0 = load i32, ptr %arrayidx, align 4 793 %cmp.i = icmp sgt i32 %0, %rdx 794 %smax = select i1 %cmp.i, i32 %0, i32 %rdx 795 %iv.next = add nuw nsw i64 %iv, 1 796 %exitcond.not = icmp eq i64 %iv.next, %n 797 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 798 799for.end: 800 ret i32 %smax 801} 802 803define i32 @umin(ptr %a, i64 %n, i32 %start) { 804; IF-EVL-LABEL: @umin( 805; IF-EVL-NEXT: entry: 806; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 807; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 808; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 809; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 810; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 811; IF-EVL: vector.ph: 812; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 813; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 814; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 815; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 816; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 817; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 818; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 819; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 820; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 821; IF-EVL: vector.body: 822; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 823; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 824; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 825; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 826; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 827; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 828; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 829; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 830; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 831; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 832; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) 833; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 834; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 835; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 836; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 837; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 838; IF-EVL: middle.block: 839; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 840; IF-EVL: scalar.ph: 841; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 842; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 843; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 844; IF-EVL: for.body: 845; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 846; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] 847; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 848; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 849; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP17]], [[RDX]] 850; IF-EVL-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] 851; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 852; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 853; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 854; IF-EVL: for.end: 855; IF-EVL-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 856; IF-EVL-NEXT: ret i32 [[UMIN_LCSSA]] 857; 858; NO-VP-LABEL: @umin( 859; NO-VP-NEXT: entry: 860; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 861; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 862; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 863; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 864; NO-VP: vector.ph: 865; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 866; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 867; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 868; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 869; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 870; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 871; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 872; NO-VP: vector.body: 873; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 874; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 875; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 876; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 877; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 878; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 879; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 880; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) 881; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 882; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 883; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 884; NO-VP: middle.block: 885; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 886; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 887; NO-VP: scalar.ph: 888; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 889; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 890; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 891; NO-VP: for.body: 892; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 893; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] 894; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 895; NO-VP-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 896; NO-VP-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP11]], [[RDX]] 897; NO-VP-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]] 898; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 899; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 900; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 901; NO-VP: for.end: 902; NO-VP-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 903; NO-VP-NEXT: ret i32 [[UMIN_LCSSA]] 904; 905entry: 906 br label %for.body 907 908for.body: 909 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 910 %rdx = phi i32 [ %start, %entry ], [ %umin, %for.body ] 911 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 912 %0 = load i32, ptr %arrayidx, align 4 913 %cmp.i = icmp ult i32 %0, %rdx 914 %umin = select i1 %cmp.i, i32 %0, i32 %rdx 915 %iv.next = add nuw nsw i64 %iv, 1 916 %exitcond.not = icmp eq i64 %iv.next, %n 917 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 918 919for.end: 920 ret i32 %umin 921} 922 923define i32 @umax(ptr %a, i64 %n, i32 %start) { 924; IF-EVL-LABEL: @umax( 925; IF-EVL-NEXT: entry: 926; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 927; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 928; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 929; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 930; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 931; IF-EVL: vector.ph: 932; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 933; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 934; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 935; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 936; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 937; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 938; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 939; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 940; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 941; IF-EVL: vector.body: 942; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 943; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 944; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 945; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 946; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 947; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 948; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 949; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 950; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 951; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 952; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) 953; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 954; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 955; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 956; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 957; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 958; IF-EVL: middle.block: 959; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 960; IF-EVL: scalar.ph: 961; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 962; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 963; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 964; IF-EVL: for.body: 965; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 966; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] 967; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 968; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 969; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP17]], [[RDX]] 970; IF-EVL-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] 971; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 972; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 973; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 974; IF-EVL: for.end: 975; IF-EVL-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 976; IF-EVL-NEXT: ret i32 [[UMAX_LCSSA]] 977; 978; NO-VP-LABEL: @umax( 979; NO-VP-NEXT: entry: 980; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 981; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 982; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 983; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 984; NO-VP: vector.ph: 985; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 986; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 987; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 988; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 989; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 990; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 991; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 992; NO-VP: vector.body: 993; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 994; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 995; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 996; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 997; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 998; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 999; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 1000; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) 1001; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1002; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1003; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1004; NO-VP: middle.block: 1005; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1006; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1007; NO-VP: scalar.ph: 1008; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1009; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1010; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1011; NO-VP: for.body: 1012; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1013; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] 1014; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1015; NO-VP-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1016; NO-VP-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP11]], [[RDX]] 1017; NO-VP-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]] 1018; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1019; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1020; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1021; NO-VP: for.end: 1022; NO-VP-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 1023; NO-VP-NEXT: ret i32 [[UMAX_LCSSA]] 1024; 1025entry: 1026 br label %for.body 1027 1028for.body: 1029 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1030 %rdx = phi i32 [ %start, %entry ], [ %umax, %for.body ] 1031 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1032 %0 = load i32, ptr %arrayidx, align 4 1033 %cmp.i = icmp ugt i32 %0, %rdx 1034 %umax = select i1 %cmp.i, i32 %0, i32 %rdx 1035 %iv.next = add nuw nsw i64 %iv, 1 1036 %exitcond.not = icmp eq i64 %iv.next, %n 1037 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1038 1039for.end: 1040 ret i32 %umax 1041} 1042 1043define float @fadd(ptr %a, i64 %n, float %start) { 1044; IF-EVL-LABEL: @fadd( 1045; IF-EVL-NEXT: entry: 1046; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1047; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1048; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1049; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1050; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1051; IF-EVL: vector.ph: 1052; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1053; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1054; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1055; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1056; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1057; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1058; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1059; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1060; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1061; IF-EVL: vector.body: 1062; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1063; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1064; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 1065; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1066; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1067; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1068; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1069; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1070; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1071; IF-EVL-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1072; IF-EVL-NEXT: [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]] 1073; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 1074; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 1075; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1076; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1077; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1078; IF-EVL: middle.block: 1079; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1080; IF-EVL: scalar.ph: 1081; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1082; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1083; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1084; IF-EVL: for.body: 1085; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1086; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1087; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1088; IF-EVL-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1089; IF-EVL-NEXT: [[ADD]] = fadd reassoc float [[TMP18]], [[RDX]] 1090; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1091; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1092; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1093; IF-EVL: for.end: 1094; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 1095; IF-EVL-NEXT: ret float [[ADD_LCSSA]] 1096; 1097; NO-VP-LABEL: @fadd( 1098; NO-VP-NEXT: entry: 1099; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1100; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1101; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1102; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1103; NO-VP: vector.ph: 1104; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1105; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1106; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1107; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1108; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1109; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1110; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1111; NO-VP: vector.body: 1112; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1113; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1114; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1115; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1116; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1117; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1118; NO-VP-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[WIDE_LOAD]]) 1119; NO-VP-NEXT: [[TMP10]] = fadd reassoc float [[TMP9]], [[VEC_PHI]] 1120; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1121; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1122; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1123; NO-VP: middle.block: 1124; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1125; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1126; NO-VP: scalar.ph: 1127; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1128; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1129; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1130; NO-VP: for.body: 1131; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1132; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1133; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1134; NO-VP-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1135; NO-VP-NEXT: [[ADD]] = fadd reassoc float [[TMP12]], [[RDX]] 1136; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1137; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1138; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1139; NO-VP: for.end: 1140; NO-VP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 1141; NO-VP-NEXT: ret float [[ADD_LCSSA]] 1142; 1143entry: 1144 br label %for.body 1145 1146for.body: 1147 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1148 %rdx = phi float [ %start, %entry ], [ %add, %for.body ] 1149 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1150 %0 = load float, ptr %arrayidx, align 4 1151 %add = fadd reassoc float %0, %rdx 1152 %iv.next = add nuw nsw i64 %iv, 1 1153 %exitcond.not = icmp eq i64 %iv.next, %n 1154 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1155 1156for.end: 1157 ret float %add 1158} 1159 1160; not support fmul reduction for scalable vector 1161define float @fmul(ptr %a, i64 %n, float %start) { 1162; IF-EVL-LABEL: @fmul( 1163; IF-EVL-NEXT: entry: 1164; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1165; IF-EVL: for.body: 1166; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1167; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 1168; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1169; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1170; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] 1171; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1172; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1173; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1174; IF-EVL: for.end: 1175; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ] 1176; IF-EVL-NEXT: ret float [[MUL_LCSSA]] 1177; 1178; NO-VP-LABEL: @fmul( 1179; NO-VP-NEXT: entry: 1180; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 1181; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1182; NO-VP: vector.ph: 1183; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 1184; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1185; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1186; NO-VP: vector.body: 1187; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1188; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 1189; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 1190; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1191; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] 1192; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 1193; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 1194; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 1195; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 1196; NO-VP-NEXT: [[TMP6:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) 1197; NO-VP-NEXT: [[TMP7]] = fmul reassoc float [[TMP6]], [[VEC_PHI]] 1198; NO-VP-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD2]]) 1199; NO-VP-NEXT: [[TMP9]] = fmul reassoc float [[TMP8]], [[VEC_PHI1]] 1200; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1201; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1202; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1203; NO-VP: middle.block: 1204; NO-VP-NEXT: [[BIN_RDX:%.*]] = fmul reassoc float [[TMP9]], [[TMP7]] 1205; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1206; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1207; NO-VP: scalar.ph: 1208; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1209; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1210; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1211; NO-VP: for.body: 1212; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1213; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 1214; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1215; NO-VP-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1216; NO-VP-NEXT: [[MUL]] = fmul reassoc float [[TMP11]], [[RDX]] 1217; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1218; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1219; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1220; NO-VP: for.end: 1221; NO-VP-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 1222; NO-VP-NEXT: ret float [[MUL_LCSSA]] 1223; 1224entry: 1225 br label %for.body 1226 1227for.body: 1228 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1229 %rdx = phi float [ %start, %entry ], [ %mul, %for.body ] 1230 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1231 %0 = load float, ptr %arrayidx, align 4 1232 %mul = fmul reassoc float %0, %rdx 1233 %iv.next = add nuw nsw i64 %iv, 1 1234 %exitcond.not = icmp eq i64 %iv.next, %n 1235 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1236 1237for.end: 1238 ret float %mul 1239} 1240 1241define float @fmin(ptr %a, i64 %n, float %start) #0 { 1242; IF-EVL-LABEL: @fmin( 1243; IF-EVL-NEXT: entry: 1244; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1245; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1246; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1247; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1248; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1249; IF-EVL: vector.ph: 1250; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1251; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1252; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1253; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1254; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1255; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1256; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1257; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1258; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1259; IF-EVL: vector.body: 1260; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1261; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1262; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 1263; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1264; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1265; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1266; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1267; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1268; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1269; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1270; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP14]], [[VEC_PHI]] 1271; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] 1272; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 1273; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 1274; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1275; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1276; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1277; IF-EVL: middle.block: 1278; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1279; IF-EVL: scalar.ph: 1280; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1281; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1282; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1283; IF-EVL: for.body: 1284; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1285; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1286; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1287; IF-EVL-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1288; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP17]], [[RDX]] 1289; IF-EVL-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]] 1290; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1291; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1292; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1293; IF-EVL: for.end: 1294; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1295; IF-EVL-NEXT: ret float [[MIN_LCSSA]] 1296; 1297; NO-VP-LABEL: @fmin( 1298; NO-VP-NEXT: entry: 1299; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1300; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1301; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1302; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1303; NO-VP: vector.ph: 1304; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1305; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1306; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1307; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1308; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1309; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1310; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1311; NO-VP: vector.body: 1312; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1313; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 1314; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1315; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1316; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1317; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1318; NO-VP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]]) 1319; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP9]], [[VEC_PHI]] 1320; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] 1321; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1322; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1323; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1324; NO-VP: middle.block: 1325; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1326; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1327; NO-VP: scalar.ph: 1328; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1329; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1330; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1331; NO-VP: for.body: 1332; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1333; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1334; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1335; NO-VP-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1336; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP11]], [[RDX]] 1337; NO-VP-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]] 1338; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1339; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1340; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1341; NO-VP: for.end: 1342; NO-VP-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1343; NO-VP-NEXT: ret float [[MIN_LCSSA]] 1344; 1345entry: 1346 br label %for.body 1347 1348for.body: 1349 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1350 %rdx = phi float [ %start, %entry ], [ %min, %for.body ] 1351 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1352 %0 = load float, ptr %arrayidx, align 4 1353 %cmp = fcmp fast olt float %0, %rdx 1354 %min = select i1 %cmp, float %0, float %rdx 1355 %iv.next = add nuw nsw i64 %iv, 1 1356 %exitcond.not = icmp eq i64 %iv.next, %n 1357 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1358 1359for.end: 1360 ret float %min 1361} 1362 1363define float @fmax(ptr %a, i64 %n, float %start) #0 { 1364; IF-EVL-LABEL: @fmax( 1365; IF-EVL-NEXT: entry: 1366; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1367; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1368; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1369; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1370; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1371; IF-EVL: vector.ph: 1372; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1373; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1374; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1375; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1376; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1377; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1378; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1379; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1380; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1381; IF-EVL: vector.body: 1382; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1383; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1384; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 1385; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1386; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1387; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1388; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1389; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1390; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1391; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1392; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP14]], [[VEC_PHI]] 1393; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] 1394; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 1395; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 1396; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1397; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1398; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1399; IF-EVL: middle.block: 1400; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1401; IF-EVL: scalar.ph: 1402; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1403; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1404; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1405; IF-EVL: for.body: 1406; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1407; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1408; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1409; IF-EVL-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1410; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP17]], [[RDX]] 1411; IF-EVL-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]] 1412; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1413; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1414; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1415; IF-EVL: for.end: 1416; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1417; IF-EVL-NEXT: ret float [[MAX_LCSSA]] 1418; 1419; NO-VP-LABEL: @fmax( 1420; NO-VP-NEXT: entry: 1421; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1422; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1423; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1424; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1425; NO-VP: vector.ph: 1426; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1427; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1428; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1429; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1430; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1431; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1432; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1433; NO-VP: vector.body: 1434; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1435; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 1436; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1437; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1438; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1439; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1440; NO-VP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]]) 1441; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP9]], [[VEC_PHI]] 1442; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] 1443; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1444; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1445; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1446; NO-VP: middle.block: 1447; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1448; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1449; NO-VP: scalar.ph: 1450; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1451; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1452; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1453; NO-VP: for.body: 1454; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1455; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1456; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1457; NO-VP-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1458; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP11]], [[RDX]] 1459; NO-VP-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]] 1460; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1461; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1462; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1463; NO-VP: for.end: 1464; NO-VP-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1465; NO-VP-NEXT: ret float [[MAX_LCSSA]] 1466; 1467entry: 1468 br label %for.body 1469 1470for.body: 1471 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1472 %rdx = phi float [ %start, %entry ], [ %max, %for.body ] 1473 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1474 %0 = load float, ptr %arrayidx, align 4 1475 %cmp = fcmp fast ogt float %0, %rdx 1476 %max = select i1 %cmp, float %0, float %rdx 1477 %iv.next = add nuw nsw i64 %iv, 1 1478 %exitcond.not = icmp eq i64 %iv.next, %n 1479 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1480 1481for.end: 1482 ret float %max 1483} 1484 1485define float @fminimum(ptr %a, i64 %n, float %start) { 1486; IF-EVL-LABEL: @fminimum( 1487; IF-EVL-NEXT: entry: 1488; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1489; IF-EVL: for.body: 1490; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1491; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1492; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1493; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1494; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) 1495; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1496; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1497; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1498; IF-EVL: for.end: 1499; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ] 1500; IF-EVL-NEXT: ret float [[MIN_LCSSA]] 1501; 1502; NO-VP-LABEL: @fminimum( 1503; NO-VP-NEXT: entry: 1504; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 1505; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1506; NO-VP: vector.ph: 1507; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 1508; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1509; NO-VP-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0 1510; NO-VP-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer 1511; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1512; NO-VP: vector.body: 1513; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1514; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 1515; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 1516; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1517; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] 1518; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 1519; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 1520; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 1521; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 1522; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) 1523; NO-VP-NEXT: [[TMP7]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) 1524; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1525; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1526; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1527; NO-VP: middle.block: 1528; NO-VP-NEXT: [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP6]], <8 x float> [[TMP7]]) 1529; NO-VP-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[RDX_MINMAX]]) 1530; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1531; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1532; NO-VP: scalar.ph: 1533; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1534; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1535; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1536; NO-VP: for.body: 1537; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1538; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1539; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1540; NO-VP-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1541; NO-VP-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP10]]) 1542; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1543; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1544; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1545; NO-VP: for.end: 1546; NO-VP-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 1547; NO-VP-NEXT: ret float [[MIN_LCSSA]] 1548; 1549entry: 1550 br label %for.body 1551 1552for.body: 1553 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1554 %rdx = phi float [ %start, %entry ], [ %min, %for.body ] 1555 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1556 %0 = load float, ptr %arrayidx, align 4 1557 %min = tail call float @llvm.minimum.f32(float %rdx, float %0) 1558 %iv.next = add nuw nsw i64 %iv, 1 1559 %exitcond.not = icmp eq i64 %iv.next, %n 1560 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1561 1562for.end: 1563 ret float %min 1564} 1565 1566define float @fmaximum(ptr %a, i64 %n, float %start) { 1567; IF-EVL-LABEL: @fmaximum( 1568; IF-EVL-NEXT: entry: 1569; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1570; IF-EVL: for.body: 1571; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1572; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1573; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1574; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1575; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) 1576; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1577; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1578; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1579; IF-EVL: for.end: 1580; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ] 1581; IF-EVL-NEXT: ret float [[MAX_LCSSA]] 1582; 1583; NO-VP-LABEL: @fmaximum( 1584; NO-VP-NEXT: entry: 1585; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 1586; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1587; NO-VP: vector.ph: 1588; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 1589; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1590; NO-VP-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0 1591; NO-VP-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer 1592; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1593; NO-VP: vector.body: 1594; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1595; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 1596; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 1597; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1598; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] 1599; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 1600; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 1601; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 1602; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 1603; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) 1604; NO-VP-NEXT: [[TMP7]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) 1605; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1606; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1607; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1608; NO-VP: middle.block: 1609; NO-VP-NEXT: [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP6]], <8 x float> [[TMP7]]) 1610; NO-VP-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[RDX_MINMAX]]) 1611; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1612; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1613; NO-VP: scalar.ph: 1614; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1615; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1616; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1617; NO-VP: for.body: 1618; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1619; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1620; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1621; NO-VP-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1622; NO-VP-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP10]]) 1623; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1624; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1625; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 1626; NO-VP: for.end: 1627; NO-VP-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 1628; NO-VP-NEXT: ret float [[MAX_LCSSA]] 1629; 1630entry: 1631 br label %for.body 1632 1633for.body: 1634 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1635 %rdx = phi float [ %start, %entry ], [ %max, %for.body ] 1636 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1637 %0 = load float, ptr %arrayidx, align 4 1638 %max = tail call float @llvm.maximum.f32(float %rdx, float %0) 1639 %iv.next = add nuw nsw i64 %iv, 1 1640 %exitcond.not = icmp eq i64 %iv.next, %n 1641 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1642 1643for.end: 1644 ret float %max 1645} 1646 1647define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { 1648; IF-EVL-LABEL: @fmuladd( 1649; IF-EVL-NEXT: entry: 1650; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1651; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1652; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1653; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1654; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1655; IF-EVL: vector.ph: 1656; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1657; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1658; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1659; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1660; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1661; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1662; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1663; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1664; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1665; IF-EVL: vector.body: 1666; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1667; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1668; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 1669; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1670; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1671; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1672; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1673; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1674; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1675; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]] 1676; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 1677; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1678; IF-EVL-NEXT: [[TMP16:%.*]] = fmul reassoc <vscale x 4 x float> [[VP_OP_LOAD]], [[VP_OP_LOAD1]] 1679; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1680; IF-EVL-NEXT: [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]] 1681; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP10]] to i64 1682; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] 1683; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1684; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1685; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1686; IF-EVL: middle.block: 1687; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1688; IF-EVL: scalar.ph: 1689; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1690; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1691; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1692; IF-EVL: for.body: 1693; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1694; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1695; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1696; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1697; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1698; IF-EVL-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1699; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP21]], float [[TMP22]], float [[RDX]]) 1700; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1701; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1702; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1703; IF-EVL: for.end: 1704; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 1705; IF-EVL-NEXT: ret float [[MULADD_LCSSA]] 1706; 1707; NO-VP-LABEL: @fmuladd( 1708; NO-VP-NEXT: entry: 1709; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1710; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1711; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1712; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1713; NO-VP: vector.ph: 1714; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1715; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1716; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1717; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1718; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1719; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1720; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1721; NO-VP: vector.body: 1722; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1723; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] 1724; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1725; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1726; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1727; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1728; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP6]] 1729; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0 1730; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4 1731; NO-VP-NEXT: [[TMP11:%.*]] = fmul reassoc <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 1732; NO-VP-NEXT: [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP11]]) 1733; NO-VP-NEXT: [[TMP13]] = fadd reassoc float [[TMP12]], [[VEC_PHI]] 1734; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1735; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1736; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1737; NO-VP: middle.block: 1738; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1739; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1740; NO-VP: scalar.ph: 1741; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1742; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1743; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1744; NO-VP: for.body: 1745; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1746; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1747; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1748; NO-VP-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1749; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1750; NO-VP-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1751; NO-VP-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP15]], float [[TMP16]], float [[RDX]]) 1752; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1753; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1754; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] 1755; NO-VP: for.end: 1756; NO-VP-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 1757; NO-VP-NEXT: ret float [[MULADD_LCSSA]] 1758; 1759entry: 1760 br label %for.body 1761 1762for.body: 1763 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1764 %rdx = phi float [ %start, %entry ], [ %muladd, %for.body ] 1765 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1766 %0 = load float, ptr %arrayidx, align 4 1767 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv 1768 %1 = load float, ptr %arrayidx2, align 4 1769 %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %rdx) 1770 %iv.next = add nuw nsw i64 %iv, 1 1771 %exitcond.not = icmp eq i64 %iv.next, %n 1772 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1773 1774for.end: 1775 ret float %muladd 1776} 1777 1778define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { 1779; IF-EVL-LABEL: @anyof_icmp( 1780; IF-EVL-NEXT: entry: 1781; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1782; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1783; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1784; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1785; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1786; IF-EVL: vector.ph: 1787; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1788; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1789; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1790; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1791; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1792; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1793; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1794; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1795; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1796; IF-EVL: vector.body: 1797; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1798; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1799; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] 1800; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1801; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1802; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1803; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 1804; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 1805; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1806; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 1807; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]] 1808; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) 1809; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 1810; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] 1811; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1812; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1813; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1814; IF-EVL: middle.block: 1815; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]]) 1816; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] 1817; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] 1818; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1819; IF-EVL: scalar.ph: 1820; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1821; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1822; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1823; IF-EVL: for.body: 1824; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1825; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 1826; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1827; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1828; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP21]], 3 1829; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 1830; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1831; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1832; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1833; IF-EVL: for.end: 1834; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 1835; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] 1836; 1837; NO-VP-LABEL: @anyof_icmp( 1838; NO-VP-NEXT: entry: 1839; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1840; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1841; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1842; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1843; NO-VP: vector.ph: 1844; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1845; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1846; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1847; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1848; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1849; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1850; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1851; NO-VP: vector.body: 1852; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1853; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1854; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1855; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 1856; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 1857; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 1858; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 1859; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] 1860; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1861; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1862; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 1863; NO-VP: middle.block: 1864; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]]) 1865; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] 1866; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] 1867; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1868; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1869; NO-VP: scalar.ph: 1870; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1871; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1872; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1873; NO-VP: for.body: 1874; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1875; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 1876; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1877; NO-VP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1878; NO-VP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP14]], 3 1879; NO-VP-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 1880; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1881; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1882; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] 1883; NO-VP: for.end: 1884; NO-VP-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 1885; NO-VP-NEXT: ret i32 [[ANYOF_LCSSA]] 1886; 1887entry: 1888 br label %for.body 1889 1890for.body: 1891 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1892 %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ] 1893 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1894 %0 = load i32, ptr %arrayidx, align 4 1895 %cmp.i = icmp slt i32 %0, 3 1896 %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx 1897 %iv.next = add nuw nsw i64 %iv, 1 1898 %exitcond.not = icmp eq i64 %iv.next, %n 1899 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1900 1901for.end: 1902 ret i32 %anyof 1903} 1904 1905define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { 1906; IF-EVL-LABEL: @anyof_fcmp( 1907; IF-EVL-NEXT: entry: 1908; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1909; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1910; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1911; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1912; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1913; IF-EVL: vector.ph: 1914; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1915; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1916; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1917; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1918; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1919; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1920; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1921; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1922; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1923; IF-EVL: vector.body: 1924; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1925; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1926; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] 1927; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1928; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) 1929; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1930; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 1931; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1932; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1933; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00) 1934; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]] 1935; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) 1936; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 1937; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] 1938; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1939; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1940; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1941; IF-EVL: middle.block: 1942; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]]) 1943; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] 1944; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] 1945; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1946; IF-EVL: scalar.ph: 1947; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1948; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1949; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1950; IF-EVL: for.body: 1951; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1952; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 1953; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1954; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1955; IF-EVL-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP21]], 3.000000e+00 1956; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 1957; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1958; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1959; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 1960; IF-EVL: for.end: 1961; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 1962; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] 1963; 1964; NO-VP-LABEL: @anyof_fcmp( 1965; NO-VP-NEXT: entry: 1966; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1967; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1968; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1969; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1970; NO-VP: vector.ph: 1971; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1972; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1973; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1974; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1975; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1976; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1977; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1978; NO-VP: vector.body: 1979; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1980; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1981; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1982; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 1983; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1984; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1985; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 1986; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] 1987; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1988; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1989; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 1990; NO-VP: middle.block: 1991; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]]) 1992; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] 1993; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] 1994; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1995; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1996; NO-VP: scalar.ph: 1997; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1998; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1999; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 2000; NO-VP: for.body: 2001; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2002; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 2003; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 2004; NO-VP-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 2005; NO-VP-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP14]], 3.000000e+00 2006; NO-VP-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 2007; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2008; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2009; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] 2010; NO-VP: for.end: 2011; NO-VP-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 2012; NO-VP-NEXT: ret i32 [[ANYOF_LCSSA]] 2013; 2014entry: 2015 br label %for.body 2016 2017for.body: 2018 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2019 %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ] 2020 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 2021 %0 = load float, ptr %arrayidx, align 4 2022 %cmp.i = fcmp fast olt float %0, 3.0 2023 %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx 2024 %iv.next = add nuw nsw i64 %iv, 1 2025 %exitcond.not = icmp eq i64 %iv.next, %n 2026 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 2027 2028for.end: 2029 ret i32 %anyof 2030} 2031 2032declare float @llvm.minimum.f32(float, float) 2033declare float @llvm.maximum.f32(float, float) 2034declare float @llvm.fmuladd.f32(float, float, float) 2035 2036attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } 2037 2038!0 = distinct !{!0, !1} 2039!1 = !{!"llvm.loop.vectorize.enable", i1 true} 2040