1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=IF-EVL 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -force-tail-folding-style=none \ 9; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 10; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=NO-VP 11 12define i32 @add(ptr %a, i64 %n, i32 %start) { 13; IF-EVL-LABEL: @add( 14; IF-EVL-NEXT: entry: 15; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 16; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 17; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 18; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 19; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 20; IF-EVL: vector.ph: 21; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 22; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 23; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 24; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 25; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 26; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 27; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 28; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 29; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 30; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 31; IF-EVL: vector.body: 32; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 33; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 34; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 35; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 36; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 37; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 38; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 39; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 40; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 41; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 42; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) 43; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 44; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 45; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 46; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 47; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 48; IF-EVL: middle.block: 49; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 50; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 51; IF-EVL: scalar.ph: 52; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 53; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 54; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 55; IF-EVL: for.body: 56; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 57; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 58; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 59; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 60; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[TMP18]], [[RDX]] 61; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 62; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 63; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 64; IF-EVL: for.end: 65; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 66; IF-EVL-NEXT: ret i32 [[ADD_LCSSA]] 67; 68; NO-VP-LABEL: @add( 69; NO-VP-NEXT: entry: 70; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 71; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 72; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 73; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 74; NO-VP: vector.ph: 75; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 76; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 77; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 78; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 79; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 80; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 81; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 82; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 83; NO-VP: vector.body: 84; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 85; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 86; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 87; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] 88; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 89; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 90; NO-VP-NEXT: [[TMP10]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 91; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 92; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 93; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 94; NO-VP: middle.block: 95; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 96; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 97; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 98; NO-VP: scalar.ph: 99; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 100; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 101; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 102; NO-VP: for.body: 103; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 104; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 105; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 106; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 107; NO-VP-NEXT: [[ADD]] = add nsw i32 [[TMP13]], [[RDX]] 108; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 109; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 110; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 111; NO-VP: for.end: 112; NO-VP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 113; NO-VP-NEXT: ret i32 [[ADD_LCSSA]] 114; 115entry: 116 br label %for.body 117 118for.body: 119 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 120 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] 121 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 122 %0 = load i32, ptr %arrayidx, align 4 123 %add = add nsw i32 %0, %rdx 124 %iv.next = add nuw nsw i64 %iv, 1 125 %exitcond.not = icmp eq i64 %iv.next, %n 126 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 127 128for.end: 129 ret i32 %add 130} 131 132; not support mul reduction for scalable vector 133define i32 @mul(ptr %a, i64 %n, i32 %start) { 134; IF-EVL-LABEL: @mul( 135; IF-EVL-NEXT: entry: 136; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 137; IF-EVL: for.body: 138; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 139; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 140; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] 141; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 142; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] 143; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 144; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 145; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 146; IF-EVL: for.end: 147; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ] 148; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] 149; 150; NO-VP-LABEL: @mul( 151; NO-VP-NEXT: entry: 152; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 153; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 154; NO-VP: vector.ph: 155; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 156; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 157; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0 158; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 159; NO-VP: vector.body: 160; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 161; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 162; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 163; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 164; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] 165; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 166; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 8 167; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 168; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 169; NO-VP-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 170; NO-VP-NEXT: [[TMP6]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] 171; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 172; NO-VP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 173; NO-VP-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 174; NO-VP: middle.block: 175; NO-VP-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP6]], [[TMP5]] 176; NO-VP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]]) 177; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 178; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 179; NO-VP: scalar.ph: 180; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 181; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 182; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 183; NO-VP: for.body: 184; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 185; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 186; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 187; NO-VP-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 188; NO-VP-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[RDX]] 189; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 190; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 191; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 192; NO-VP: for.end: 193; NO-VP-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 194; NO-VP-NEXT: ret i32 [[MUL_LCSSA]] 195; 196entry: 197 br label %for.body 198 199for.body: 200 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 201 %rdx = phi i32 [ %start, %entry ], [ %mul, %for.body ] 202 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 203 %0 = load i32, ptr %arrayidx, align 4 204 %mul = mul nsw i32 %0, %rdx 205 %iv.next = add nuw nsw i64 %iv, 1 206 %exitcond.not = icmp eq i64 %iv.next, %n 207 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 208 209for.end: 210 ret i32 %mul 211} 212 213define i32 @or(ptr %a, i64 %n, i32 %start) { 214; IF-EVL-LABEL: @or( 215; IF-EVL-NEXT: entry: 216; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 217; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 218; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 219; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 220; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 221; IF-EVL: vector.ph: 222; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 223; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 224; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 225; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 226; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 227; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 228; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 229; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 230; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 231; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 232; IF-EVL: vector.body: 233; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 234; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 235; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 236; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 237; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 238; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 239; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 240; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 241; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 242; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 243; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) 244; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 245; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 246; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 247; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 248; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 249; IF-EVL: middle.block: 250; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 251; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 252; IF-EVL: scalar.ph: 253; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 254; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 255; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 256; IF-EVL: for.body: 257; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 258; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] 259; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 260; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 261; IF-EVL-NEXT: [[OR]] = or i32 [[TMP18]], [[RDX]] 262; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 263; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 264; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 265; IF-EVL: for.end: 266; IF-EVL-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 267; IF-EVL-NEXT: ret i32 [[OR_LCSSA]] 268; 269; NO-VP-LABEL: @or( 270; NO-VP-NEXT: entry: 271; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 272; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 273; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 274; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 275; NO-VP: vector.ph: 276; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 277; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 278; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 279; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 280; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 281; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 282; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 283; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 284; NO-VP: vector.body: 285; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 286; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 287; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 288; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] 289; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 290; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 291; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 292; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 293; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 294; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 295; NO-VP: middle.block: 296; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 297; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 298; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 299; NO-VP: scalar.ph: 300; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 301; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 302; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 303; NO-VP: for.body: 304; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 305; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] 306; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 307; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 308; NO-VP-NEXT: [[OR]] = or i32 [[TMP13]], [[RDX]] 309; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 310; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 311; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 312; NO-VP: for.end: 313; NO-VP-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 314; NO-VP-NEXT: ret i32 [[OR_LCSSA]] 315; 316entry: 317 br label %for.body 318 319for.body: 320 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 321 %rdx = phi i32 [ %start, %entry ], [ %or, %for.body ] 322 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 323 %0 = load i32, ptr %arrayidx, align 4 324 %or = or i32 %0, %rdx 325 %iv.next = add nuw nsw i64 %iv, 1 326 %exitcond.not = icmp eq i64 %iv.next, %n 327 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 328 329for.end: 330 ret i32 %or 331} 332 333define i32 @and(ptr %a, i64 %n, i32 %start) { 334; IF-EVL-LABEL: @and( 335; IF-EVL-NEXT: entry: 336; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 337; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 338; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 339; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 340; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 341; IF-EVL: vector.ph: 342; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 343; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 344; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 345; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 346; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 347; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 348; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 349; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 350; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 351; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 352; IF-EVL: vector.body: 353; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 354; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 355; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 356; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 357; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 358; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 359; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 360; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 361; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 362; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 363; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) 364; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 365; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 366; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 367; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 368; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 369; IF-EVL: middle.block: 370; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 371; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 372; IF-EVL: scalar.ph: 373; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 374; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 375; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 376; IF-EVL: for.body: 377; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 378; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] 379; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 380; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 381; IF-EVL-NEXT: [[AND]] = and i32 [[TMP18]], [[RDX]] 382; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 383; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 384; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 385; IF-EVL: for.end: 386; IF-EVL-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 387; IF-EVL-NEXT: ret i32 [[AND_LCSSA]] 388; 389; NO-VP-LABEL: @and( 390; NO-VP-NEXT: entry: 391; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 392; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 393; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 394; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 395; NO-VP: vector.ph: 396; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 397; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 398; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 399; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 400; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 401; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 402; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 403; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 404; NO-VP: vector.body: 405; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 406; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 407; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 408; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] 409; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 410; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 411; NO-VP-NEXT: [[TMP10]] = and <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 412; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 413; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 414; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 415; NO-VP: middle.block: 416; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 417; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 418; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 419; NO-VP: scalar.ph: 420; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 421; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 422; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 423; NO-VP: for.body: 424; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 425; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] 426; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 427; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 428; NO-VP-NEXT: [[AND]] = and i32 [[TMP13]], [[RDX]] 429; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 430; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 431; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 432; NO-VP: for.end: 433; NO-VP-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 434; NO-VP-NEXT: ret i32 [[AND_LCSSA]] 435; 436entry: 437 br label %for.body 438 439for.body: 440 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 441 %rdx = phi i32 [ %start, %entry ], [ %and, %for.body ] 442 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 443 %0 = load i32, ptr %arrayidx, align 4 444 %and = and i32 %0, %rdx 445 %iv.next = add nuw nsw i64 %iv, 1 446 %exitcond.not = icmp eq i64 %iv.next, %n 447 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 448 449for.end: 450 ret i32 %and 451} 452 453define i32 @xor(ptr %a, i64 %n, i32 %start) { 454; IF-EVL-LABEL: @xor( 455; IF-EVL-NEXT: entry: 456; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 457; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 458; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 459; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 460; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 461; IF-EVL: vector.ph: 462; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 463; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 464; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 465; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 466; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 467; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 468; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 469; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 470; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 471; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 472; IF-EVL: vector.body: 473; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 474; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 475; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 476; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 477; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 478; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 479; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] 480; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 481; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 482; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 483; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) 484; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 485; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 486; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 487; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 488; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 489; IF-EVL: middle.block: 490; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 491; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 492; IF-EVL: scalar.ph: 493; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 494; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 495; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 496; IF-EVL: for.body: 497; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 498; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] 499; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 500; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 501; IF-EVL-NEXT: [[XOR]] = xor i32 [[TMP18]], [[RDX]] 502; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 503; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 504; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 505; IF-EVL: for.end: 506; IF-EVL-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 507; IF-EVL-NEXT: ret i32 [[XOR_LCSSA]] 508; 509; NO-VP-LABEL: @xor( 510; NO-VP-NEXT: entry: 511; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 512; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 513; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 514; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 515; NO-VP: vector.ph: 516; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 517; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 518; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 519; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 520; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 521; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 522; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 523; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 524; NO-VP: vector.body: 525; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 526; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 527; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 528; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] 529; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 530; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 531; NO-VP-NEXT: [[TMP10]] = xor <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 532; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 533; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 534; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 535; NO-VP: middle.block: 536; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 537; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 538; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 539; NO-VP: scalar.ph: 540; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 541; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 542; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 543; NO-VP: for.body: 544; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 545; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] 546; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 547; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 548; NO-VP-NEXT: [[XOR]] = xor i32 [[TMP13]], [[RDX]] 549; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 550; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 551; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 552; NO-VP: for.end: 553; NO-VP-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 554; NO-VP-NEXT: ret i32 [[XOR_LCSSA]] 555; 556entry: 557 br label %for.body 558 559for.body: 560 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 561 %rdx = phi i32 [ %start, %entry ], [ %xor, %for.body ] 562 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 563 %0 = load i32, ptr %arrayidx, align 4 564 %xor = xor i32 %0, %rdx 565 %iv.next = add nuw nsw i64 %iv, 1 566 %exitcond.not = icmp eq i64 %iv.next, %n 567 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 568 569for.end: 570 ret i32 %xor 571} 572 573define i32 @smin(ptr %a, i64 %n, i32 %start) { 574; IF-EVL-LABEL: @smin( 575; IF-EVL-NEXT: entry: 576; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 577; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 578; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 579; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 580; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 581; IF-EVL: vector.ph: 582; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 583; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 584; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 585; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 586; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 587; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 588; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 589; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 590; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 591; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 592; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 593; IF-EVL: vector.body: 594; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 595; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 596; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 597; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 598; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 599; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 600; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 601; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 602; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 603; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] 604; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 605; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 606; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 607; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 608; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 609; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 610; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 611; IF-EVL: middle.block: 612; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]]) 613; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 614; IF-EVL: scalar.ph: 615; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 616; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 617; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 618; IF-EVL: for.body: 619; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 620; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] 621; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 622; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 623; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP19]], [[RDX]] 624; IF-EVL-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] 625; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 626; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 627; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 628; IF-EVL: for.end: 629; IF-EVL-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 630; IF-EVL-NEXT: ret i32 [[SMIN_LCSSA]] 631; 632; NO-VP-LABEL: @smin( 633; NO-VP-NEXT: entry: 634; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 635; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 636; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 637; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 638; NO-VP: vector.ph: 639; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 640; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 641; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 642; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 643; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 644; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 645; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 646; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 647; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 648; NO-VP: vector.body: 649; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 650; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 651; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 652; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 653; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 654; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 655; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 656; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] 657; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 658; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 659; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 660; NO-VP: middle.block: 661; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 662; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 663; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 664; NO-VP: scalar.ph: 665; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 666; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 667; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 668; NO-VP: for.body: 669; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 670; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] 671; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 672; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 673; NO-VP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP13]], [[RDX]] 674; NO-VP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP13]], i32 [[RDX]] 675; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 676; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 677; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 678; NO-VP: for.end: 679; NO-VP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 680; NO-VP-NEXT: ret i32 [[SMIN_LCSSA]] 681; 682entry: 683 br label %for.body 684 685for.body: 686 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 687 %rdx = phi i32 [ %start, %entry ], [ %smin, %for.body ] 688 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 689 %0 = load i32, ptr %arrayidx, align 4 690 %cmp.i = icmp slt i32 %0, %rdx 691 %smin = select i1 %cmp.i, i32 %0, i32 %rdx 692 %iv.next = add nuw nsw i64 %iv, 1 693 %exitcond.not = icmp eq i64 %iv.next, %n 694 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 695 696for.end: 697 ret i32 %smin 698} 699 700define i32 @smax(ptr %a, i64 %n, i32 %start) { 701; IF-EVL-LABEL: @smax( 702; IF-EVL-NEXT: entry: 703; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 704; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 705; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 706; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 707; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 708; IF-EVL: vector.ph: 709; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 710; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 711; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 712; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 713; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 714; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 715; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 716; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 717; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 718; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 719; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 720; IF-EVL: vector.body: 721; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 722; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 723; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 724; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 725; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 726; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 727; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 728; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 729; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 730; IF-EVL-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] 731; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 732; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 733; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 734; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 735; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 736; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 737; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 738; IF-EVL: middle.block: 739; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[TMP15]]) 740; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 741; IF-EVL: scalar.ph: 742; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 743; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 744; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 745; IF-EVL: for.body: 746; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 747; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] 748; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 749; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 750; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP19]], [[RDX]] 751; IF-EVL-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] 752; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 753; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 754; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 755; IF-EVL: for.end: 756; IF-EVL-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 757; IF-EVL-NEXT: ret i32 [[SMAX_LCSSA]] 758; 759; NO-VP-LABEL: @smax( 760; NO-VP-NEXT: entry: 761; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 762; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 763; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 764; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 765; NO-VP: vector.ph: 766; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 767; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 768; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 769; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 770; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 771; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 772; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 773; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 774; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 775; NO-VP: vector.body: 776; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 777; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 778; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 779; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 780; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 781; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 782; NO-VP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 783; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] 784; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 785; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 786; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 787; NO-VP: middle.block: 788; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 789; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 790; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 791; NO-VP: scalar.ph: 792; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 793; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 794; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 795; NO-VP: for.body: 796; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 797; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] 798; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 799; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 800; NO-VP-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP13]], [[RDX]] 801; NO-VP-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP13]], i32 [[RDX]] 802; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 803; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 804; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 805; NO-VP: for.end: 806; NO-VP-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 807; NO-VP-NEXT: ret i32 [[SMAX_LCSSA]] 808; 809entry: 810 br label %for.body 811 812for.body: 813 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 814 %rdx = phi i32 [ %start, %entry ], [ %smax, %for.body ] 815 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 816 %0 = load i32, ptr %arrayidx, align 4 817 %cmp.i = icmp sgt i32 %0, %rdx 818 %smax = select i1 %cmp.i, i32 %0, i32 %rdx 819 %iv.next = add nuw nsw i64 %iv, 1 820 %exitcond.not = icmp eq i64 %iv.next, %n 821 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 822 823for.end: 824 ret i32 %smax 825} 826 827define i32 @umin(ptr %a, i64 %n, i32 %start) { 828; IF-EVL-LABEL: @umin( 829; IF-EVL-NEXT: entry: 830; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 831; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 832; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 833; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 834; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 835; IF-EVL: vector.ph: 836; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 837; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 838; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 839; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 840; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 841; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 842; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 843; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 844; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 845; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 846; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 847; IF-EVL: vector.body: 848; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 849; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 850; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 851; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 852; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 853; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 854; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 855; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 856; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 857; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] 858; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 859; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 860; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 861; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 862; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 863; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 864; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 865; IF-EVL: middle.block: 866; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[TMP15]]) 867; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 868; IF-EVL: scalar.ph: 869; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 870; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 871; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 872; IF-EVL: for.body: 873; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 874; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] 875; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 876; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 877; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP19]], [[RDX]] 878; IF-EVL-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] 879; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 880; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 881; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 882; IF-EVL: for.end: 883; IF-EVL-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 884; IF-EVL-NEXT: ret i32 [[UMIN_LCSSA]] 885; 886; NO-VP-LABEL: @umin( 887; NO-VP-NEXT: entry: 888; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 889; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 890; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 891; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 892; NO-VP: vector.ph: 893; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 894; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 895; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 896; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 897; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 898; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 899; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 900; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 901; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 902; NO-VP: vector.body: 903; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 904; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 905; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 906; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 907; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 908; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 909; NO-VP-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 910; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] 911; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 912; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 913; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 914; NO-VP: middle.block: 915; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 916; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 917; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 918; NO-VP: scalar.ph: 919; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 920; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 921; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 922; NO-VP: for.body: 923; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 924; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] 925; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 926; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 927; NO-VP-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP13]], [[RDX]] 928; NO-VP-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP13]], i32 [[RDX]] 929; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 930; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 931; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 932; NO-VP: for.end: 933; NO-VP-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 934; NO-VP-NEXT: ret i32 [[UMIN_LCSSA]] 935; 936entry: 937 br label %for.body 938 939for.body: 940 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 941 %rdx = phi i32 [ %start, %entry ], [ %umin, %for.body ] 942 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 943 %0 = load i32, ptr %arrayidx, align 4 944 %cmp.i = icmp ult i32 %0, %rdx 945 %umin = select i1 %cmp.i, i32 %0, i32 %rdx 946 %iv.next = add nuw nsw i64 %iv, 1 947 %exitcond.not = icmp eq i64 %iv.next, %n 948 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 949 950for.end: 951 ret i32 %umin 952} 953 954define i32 @umax(ptr %a, i64 %n, i32 %start) { 955; IF-EVL-LABEL: @umax( 956; IF-EVL-NEXT: entry: 957; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 958; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 959; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 960; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 961; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 962; IF-EVL: vector.ph: 963; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 964; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 965; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 966; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 967; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 968; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 969; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 970; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 971; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 972; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 973; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 974; IF-EVL: vector.body: 975; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 976; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 977; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 978; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 979; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 980; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 981; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 982; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 983; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 984; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ugt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] 985; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 986; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) 987; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 988; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 989; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 990; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 991; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 992; IF-EVL: middle.block: 993; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP15]]) 994; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 995; IF-EVL: scalar.ph: 996; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 997; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 998; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 999; IF-EVL: for.body: 1000; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1001; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] 1002; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1003; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1004; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP19]], [[RDX]] 1005; IF-EVL-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] 1006; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1007; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1008; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1009; IF-EVL: for.end: 1010; IF-EVL-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 1011; IF-EVL-NEXT: ret i32 [[UMAX_LCSSA]] 1012; 1013; NO-VP-LABEL: @umax( 1014; NO-VP-NEXT: entry: 1015; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1016; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1017; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1018; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1019; NO-VP: vector.ph: 1020; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1021; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1022; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1023; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1024; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1025; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1026; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 1027; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1028; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1029; NO-VP: vector.body: 1030; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1031; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1032; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1033; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 1034; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 1035; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 1036; NO-VP-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 1037; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] 1038; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1039; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1040; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1041; NO-VP: middle.block: 1042; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 1043; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1044; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1045; NO-VP: scalar.ph: 1046; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1047; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1048; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1049; NO-VP: for.body: 1050; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1051; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] 1052; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1053; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1054; NO-VP-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP13]], [[RDX]] 1055; NO-VP-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP13]], i32 [[RDX]] 1056; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1057; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1058; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1059; NO-VP: for.end: 1060; NO-VP-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 1061; NO-VP-NEXT: ret i32 [[UMAX_LCSSA]] 1062; 1063entry: 1064 br label %for.body 1065 1066for.body: 1067 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1068 %rdx = phi i32 [ %start, %entry ], [ %umax, %for.body ] 1069 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1070 %0 = load i32, ptr %arrayidx, align 4 1071 %cmp.i = icmp ugt i32 %0, %rdx 1072 %umax = select i1 %cmp.i, i32 %0, i32 %rdx 1073 %iv.next = add nuw nsw i64 %iv, 1 1074 %exitcond.not = icmp eq i64 %iv.next, %n 1075 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1076 1077for.end: 1078 ret i32 %umax 1079} 1080 1081define float @fadd(ptr %a, i64 %n, float %start) { 1082; IF-EVL-LABEL: @fadd( 1083; IF-EVL-NEXT: entry: 1084; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1085; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1086; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1087; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1088; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1089; IF-EVL: vector.ph: 1090; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1091; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1092; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1093; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1094; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1095; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1096; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1097; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1098; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 1099; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1100; IF-EVL: vector.body: 1101; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1102; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1103; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 1104; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1105; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1106; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1107; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1108; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1109; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1110; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1111; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) 1112; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 1113; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] 1114; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1115; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1116; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1117; IF-EVL: middle.block: 1118; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP14]]) 1119; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1120; IF-EVL: scalar.ph: 1121; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1122; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1123; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1124; IF-EVL: for.body: 1125; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1126; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1127; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1128; IF-EVL-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1129; IF-EVL-NEXT: [[ADD]] = fadd reassoc float [[TMP18]], [[RDX]] 1130; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1131; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1132; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1133; IF-EVL: for.end: 1134; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 1135; IF-EVL-NEXT: ret float [[ADD_LCSSA]] 1136; 1137; NO-VP-LABEL: @fadd( 1138; NO-VP-NEXT: entry: 1139; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1140; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1141; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1142; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1143; NO-VP: vector.ph: 1144; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1145; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1146; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1147; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1148; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1149; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1150; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 1151; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1152; NO-VP: vector.body: 1153; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1154; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1155; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 1156; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP7]] 1157; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 1158; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 1159; NO-VP-NEXT: [[TMP10]] = fadd reassoc <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 1160; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1161; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1162; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1163; NO-VP: middle.block: 1164; NO-VP-NEXT: [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP10]]) 1165; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1166; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1167; NO-VP: scalar.ph: 1168; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1169; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1170; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1171; NO-VP: for.body: 1172; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1173; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1174; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1175; NO-VP-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1176; NO-VP-NEXT: [[ADD]] = fadd reassoc float [[TMP13]], [[RDX]] 1177; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1178; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1179; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1180; NO-VP: for.end: 1181; NO-VP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 1182; NO-VP-NEXT: ret float [[ADD_LCSSA]] 1183; 1184entry: 1185 br label %for.body 1186 1187for.body: 1188 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1189 %rdx = phi float [ %start, %entry ], [ %add, %for.body ] 1190 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1191 %0 = load float, ptr %arrayidx, align 4 1192 %add = fadd reassoc float %0, %rdx 1193 %iv.next = add nuw nsw i64 %iv, 1 1194 %exitcond.not = icmp eq i64 %iv.next, %n 1195 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1196 1197for.end: 1198 ret float %add 1199} 1200 1201; not support fmul reduction for scalable vector 1202define float @fmul(ptr %a, i64 %n, float %start) { 1203; IF-EVL-LABEL: @fmul( 1204; IF-EVL-NEXT: entry: 1205; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1206; IF-EVL: for.body: 1207; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1208; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 1209; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1210; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1211; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] 1212; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1213; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1214; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1215; IF-EVL: for.end: 1216; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ] 1217; IF-EVL-NEXT: ret float [[MUL_LCSSA]] 1218; 1219; NO-VP-LABEL: @fmul( 1220; NO-VP-NEXT: entry: 1221; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 1222; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1223; NO-VP: vector.ph: 1224; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 1225; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1226; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0 1227; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1228; NO-VP: vector.body: 1229; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1230; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1231; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 1232; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 1233; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP1]] 1234; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 1235; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 1236; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 1237; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 1238; NO-VP-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_LOAD]], [[VEC_PHI]] 1239; NO-VP-NEXT: [[TMP6]] = fmul reassoc <8 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] 1240; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1241; NO-VP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1242; NO-VP-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1243; NO-VP: middle.block: 1244; NO-VP-NEXT: [[BIN_RDX:%.*]] = fmul reassoc <8 x float> [[TMP6]], [[TMP5]] 1245; NO-VP-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[BIN_RDX]]) 1246; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1247; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1248; NO-VP: scalar.ph: 1249; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1250; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1251; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1252; NO-VP: for.body: 1253; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1254; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 1255; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1256; NO-VP-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1257; NO-VP-NEXT: [[MUL]] = fmul reassoc float [[TMP9]], [[RDX]] 1258; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1259; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1260; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1261; NO-VP: for.end: 1262; NO-VP-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 1263; NO-VP-NEXT: ret float [[MUL_LCSSA]] 1264; 1265entry: 1266 br label %for.body 1267 1268for.body: 1269 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1270 %rdx = phi float [ %start, %entry ], [ %mul, %for.body ] 1271 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1272 %0 = load float, ptr %arrayidx, align 4 1273 %mul = fmul reassoc float %0, %rdx 1274 %iv.next = add nuw nsw i64 %iv, 1 1275 %exitcond.not = icmp eq i64 %iv.next, %n 1276 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1277 1278for.end: 1279 ret float %mul 1280} 1281 1282define float @fmin(ptr %a, i64 %n, float %start) #0 { 1283; IF-EVL-LABEL: @fmin( 1284; IF-EVL-NEXT: entry: 1285; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1286; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1287; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1288; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1289; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1290; IF-EVL: vector.ph: 1291; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1292; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1293; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1294; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1295; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1296; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1297; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1298; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1299; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 1300; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1301; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1302; IF-EVL: vector.body: 1303; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1304; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1305; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 1306; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1307; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1308; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 1309; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP10]] 1310; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 1311; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 1312; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]] 1313; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) 1314; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) 1315; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 1316; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 1317; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1318; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1319; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1320; IF-EVL: middle.block: 1321; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP15]]) 1322; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1323; IF-EVL: scalar.ph: 1324; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1325; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1326; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1327; IF-EVL: for.body: 1328; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1329; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1330; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1331; IF-EVL-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1332; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP19]], [[RDX]] 1333; IF-EVL-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP19]], float [[RDX]] 1334; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1335; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1336; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1337; IF-EVL: for.end: 1338; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 1339; IF-EVL-NEXT: ret float [[MIN_LCSSA]] 1340; 1341; NO-VP-LABEL: @fmin( 1342; NO-VP-NEXT: entry: 1343; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1344; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1345; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1346; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1347; NO-VP: vector.ph: 1348; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1349; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1350; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1351; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1352; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1353; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1354; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 1355; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1356; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1357; NO-VP: vector.body: 1358; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1359; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1360; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1361; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1362; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1363; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1364; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 1365; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] 1366; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1367; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1368; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1369; NO-VP: middle.block: 1370; NO-VP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP10]]) 1371; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1372; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1373; NO-VP: scalar.ph: 1374; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1375; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1376; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1377; NO-VP: for.body: 1378; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1379; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1380; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1381; NO-VP-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1382; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP13]], [[RDX]] 1383; NO-VP-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP13]], float [[RDX]] 1384; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1385; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1386; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1387; NO-VP: for.end: 1388; NO-VP-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 1389; NO-VP-NEXT: ret float [[MIN_LCSSA]] 1390; 1391entry: 1392 br label %for.body 1393 1394for.body: 1395 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1396 %rdx = phi float [ %start, %entry ], [ %min, %for.body ] 1397 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1398 %0 = load float, ptr %arrayidx, align 4 1399 %cmp = fcmp fast olt float %0, %rdx 1400 %min = select i1 %cmp, float %0, float %rdx 1401 %iv.next = add nuw nsw i64 %iv, 1 1402 %exitcond.not = icmp eq i64 %iv.next, %n 1403 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1404 1405for.end: 1406 ret float %min 1407} 1408 1409define float @fmax(ptr %a, i64 %n, float %start) #0 { 1410; IF-EVL-LABEL: @fmax( 1411; IF-EVL-NEXT: entry: 1412; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1413; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1414; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1415; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1416; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1417; IF-EVL: vector.ph: 1418; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1419; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1420; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1421; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1422; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1423; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1424; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1425; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1426; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 1427; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1428; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1429; IF-EVL: vector.body: 1430; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1431; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1432; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 1433; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1434; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1435; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 1436; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP10]] 1437; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 1438; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 1439; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast ogt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]] 1440; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) 1441; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) 1442; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 1443; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 1444; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1445; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1446; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1447; IF-EVL: middle.block: 1448; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP15]]) 1449; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1450; IF-EVL: scalar.ph: 1451; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1452; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1453; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1454; IF-EVL: for.body: 1455; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1456; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1457; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1458; IF-EVL-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1459; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP19]], [[RDX]] 1460; IF-EVL-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP19]], float [[RDX]] 1461; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1462; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1463; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1464; IF-EVL: for.end: 1465; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 1466; IF-EVL-NEXT: ret float [[MAX_LCSSA]] 1467; 1468; NO-VP-LABEL: @fmax( 1469; NO-VP-NEXT: entry: 1470; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1471; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1472; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1473; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1474; NO-VP: vector.ph: 1475; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1476; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1477; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1478; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1479; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1480; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1481; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 1482; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1483; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1484; NO-VP: vector.body: 1485; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1486; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1487; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1488; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] 1489; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 1490; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 1491; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] 1492; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] 1493; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1494; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1495; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1496; NO-VP: middle.block: 1497; NO-VP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP10]]) 1498; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1499; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1500; NO-VP: scalar.ph: 1501; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1502; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1503; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1504; NO-VP: for.body: 1505; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1506; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1507; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1508; NO-VP-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1509; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP13]], [[RDX]] 1510; NO-VP-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP13]], float [[RDX]] 1511; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1512; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1513; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1514; NO-VP: for.end: 1515; NO-VP-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 1516; NO-VP-NEXT: ret float [[MAX_LCSSA]] 1517; 1518entry: 1519 br label %for.body 1520 1521for.body: 1522 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1523 %rdx = phi float [ %start, %entry ], [ %max, %for.body ] 1524 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1525 %0 = load float, ptr %arrayidx, align 4 1526 %cmp = fcmp fast ogt float %0, %rdx 1527 %max = select i1 %cmp, float %0, float %rdx 1528 %iv.next = add nuw nsw i64 %iv, 1 1529 %exitcond.not = icmp eq i64 %iv.next, %n 1530 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1531 1532for.end: 1533 ret float %max 1534} 1535 1536define float @fminimum(ptr %a, i64 %n, float %start) { 1537; IF-EVL-LABEL: @fminimum( 1538; IF-EVL-NEXT: entry: 1539; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1540; IF-EVL: for.body: 1541; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1542; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1543; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1544; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1545; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) 1546; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1547; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1548; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1549; IF-EVL: for.end: 1550; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ] 1551; IF-EVL-NEXT: ret float [[MIN_LCSSA]] 1552; 1553; NO-VP-LABEL: @fminimum( 1554; NO-VP-NEXT: entry: 1555; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 1556; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1557; NO-VP: vector.ph: 1558; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 1559; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1560; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0 1561; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer 1562; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1563; NO-VP: vector.body: 1564; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1565; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1566; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1567; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1568; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] 1569; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1570; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 1571; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 1572; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 1573; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) 1574; NO-VP-NEXT: [[TMP5]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) 1575; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1576; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1577; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1578; NO-VP: middle.block: 1579; NO-VP-NEXT: [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP5]]) 1580; NO-VP-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[RDX_MINMAX]]) 1581; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1582; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1583; NO-VP: scalar.ph: 1584; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1585; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1586; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1587; NO-VP: for.body: 1588; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1589; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] 1590; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1591; NO-VP-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1592; NO-VP-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP8]]) 1593; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1594; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1595; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1596; NO-VP: for.end: 1597; NO-VP-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1598; NO-VP-NEXT: ret float [[MIN_LCSSA]] 1599; 1600entry: 1601 br label %for.body 1602 1603for.body: 1604 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1605 %rdx = phi float [ %start, %entry ], [ %min, %for.body ] 1606 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1607 %0 = load float, ptr %arrayidx, align 4 1608 %min = tail call float @llvm.minimum.f32(float %rdx, float %0) 1609 %iv.next = add nuw nsw i64 %iv, 1 1610 %exitcond.not = icmp eq i64 %iv.next, %n 1611 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1612 1613for.end: 1614 ret float %min 1615} 1616 1617define float @fmaximum(ptr %a, i64 %n, float %start) { 1618; IF-EVL-LABEL: @fmaximum( 1619; IF-EVL-NEXT: entry: 1620; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1621; IF-EVL: for.body: 1622; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1623; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1624; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] 1625; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1626; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) 1627; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1628; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] 1629; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]] 1630; IF-EVL: for.end: 1631; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ] 1632; IF-EVL-NEXT: ret float [[MAX_LCSSA]] 1633; 1634; NO-VP-LABEL: @fmaximum( 1635; NO-VP-NEXT: entry: 1636; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 1637; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1638; NO-VP: vector.ph: 1639; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 1640; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1641; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0 1642; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer 1643; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1644; NO-VP: vector.body: 1645; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1646; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1647; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1648; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1649; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] 1650; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 1651; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 1652; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 1653; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 1654; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) 1655; NO-VP-NEXT: [[TMP5]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) 1656; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1657; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1658; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1659; NO-VP: middle.block: 1660; NO-VP-NEXT: [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP5]]) 1661; NO-VP-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[RDX_MINMAX]]) 1662; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1663; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1664; NO-VP: scalar.ph: 1665; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1666; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1667; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1668; NO-VP: for.body: 1669; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1670; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] 1671; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1672; NO-VP-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1673; NO-VP-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP8]]) 1674; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1675; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1676; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 1677; NO-VP: for.end: 1678; NO-VP-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1679; NO-VP-NEXT: ret float [[MAX_LCSSA]] 1680; 1681entry: 1682 br label %for.body 1683 1684for.body: 1685 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1686 %rdx = phi float [ %start, %entry ], [ %max, %for.body ] 1687 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1688 %0 = load float, ptr %arrayidx, align 4 1689 %max = tail call float @llvm.maximum.f32(float %rdx, float %0) 1690 %iv.next = add nuw nsw i64 %iv, 1 1691 %exitcond.not = icmp eq i64 %iv.next, %n 1692 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1693 1694for.end: 1695 ret float %max 1696} 1697 1698define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { 1699; IF-EVL-LABEL: @fmuladd( 1700; IF-EVL-NEXT: entry: 1701; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1702; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1703; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1704; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1705; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1706; IF-EVL: vector.ph: 1707; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1708; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1709; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1710; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1711; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1712; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1713; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1714; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1715; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 1716; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1717; IF-EVL: vector.body: 1718; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1719; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1720; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 1721; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1722; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1723; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 1724; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] 1725; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 1726; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1727; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]] 1728; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 1729; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1730; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 4 x float> @llvm.vp.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) 1731; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) 1732; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 1733; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 1734; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1735; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1736; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1737; IF-EVL: middle.block: 1738; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP17]]) 1739; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1740; IF-EVL: scalar.ph: 1741; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1742; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1743; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1744; IF-EVL: for.body: 1745; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1746; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1747; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1748; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1749; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1750; IF-EVL-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1751; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP21]], float [[TMP22]], float [[RDX]]) 1752; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1753; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1754; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1755; IF-EVL: for.end: 1756; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 1757; IF-EVL-NEXT: ret float [[MULADD_LCSSA]] 1758; 1759; NO-VP-LABEL: @fmuladd( 1760; NO-VP-NEXT: entry: 1761; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1762; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1763; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1764; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1765; NO-VP: vector.ph: 1766; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1767; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1768; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1769; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1770; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1771; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1772; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 1773; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1774; NO-VP: vector.body: 1775; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1776; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 1777; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 1778; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP7]] 1779; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 1780; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 1781; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP7]] 1782; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 1783; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4 1784; NO-VP-NEXT: [[TMP12]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD1]], <vscale x 4 x float> [[VEC_PHI]]) 1785; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1786; NO-VP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1787; NO-VP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1788; NO-VP: middle.block: 1789; NO-VP-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP12]]) 1790; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1791; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1792; NO-VP: scalar.ph: 1793; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1794; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1795; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1796; NO-VP: for.body: 1797; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1798; NO-VP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 1799; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1800; NO-VP-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1801; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1802; NO-VP-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 1803; NO-VP-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP15]], float [[TMP16]], float [[RDX]]) 1804; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1805; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1806; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] 1807; NO-VP: for.end: 1808; NO-VP-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 1809; NO-VP-NEXT: ret float [[MULADD_LCSSA]] 1810; 1811entry: 1812 br label %for.body 1813 1814for.body: 1815 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1816 %rdx = phi float [ %start, %entry ], [ %muladd, %for.body ] 1817 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1818 %0 = load float, ptr %arrayidx, align 4 1819 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv 1820 %1 = load float, ptr %arrayidx2, align 4 1821 %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %rdx) 1822 %iv.next = add nuw nsw i64 %iv, 1 1823 %exitcond.not = icmp eq i64 %iv.next, %n 1824 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1825 1826for.end: 1827 ret float %muladd 1828} 1829 1830define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { 1831; IF-EVL-LABEL: @anyof_icmp( 1832; IF-EVL-NEXT: entry: 1833; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1834; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1835; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1836; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1837; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1838; IF-EVL: vector.ph: 1839; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1840; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1841; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1842; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1843; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1844; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1845; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1846; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1847; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1848; IF-EVL: vector.body: 1849; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1850; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1851; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 1852; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1853; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1854; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 1855; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 1856; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 1857; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 1858; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 1859; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]] 1860; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) 1861; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 1862; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 1863; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1864; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1865; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1866; IF-EVL: middle.block: 1867; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]]) 1868; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] 1869; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] 1870; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1871; IF-EVL: scalar.ph: 1872; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1873; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1874; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 1875; IF-EVL: for.body: 1876; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1877; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 1878; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1879; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1880; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP20]], 3 1881; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 1882; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1883; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1884; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1885; IF-EVL: for.end: 1886; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 1887; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] 1888; 1889; NO-VP-LABEL: @anyof_icmp( 1890; NO-VP-NEXT: entry: 1891; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1892; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1893; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 1894; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1895; NO-VP: vector.ph: 1896; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1897; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1898; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1899; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1900; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1901; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1902; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 1903; NO-VP: vector.body: 1904; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1905; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 1906; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1907; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 1908; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 1909; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 1910; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 1911; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] 1912; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1913; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1914; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 1915; NO-VP: middle.block: 1916; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]]) 1917; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] 1918; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] 1919; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1920; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1921; NO-VP: scalar.ph: 1922; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1923; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 1924; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 1925; NO-VP: for.body: 1926; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1927; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 1928; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1929; NO-VP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1930; NO-VP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP14]], 3 1931; NO-VP-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 1932; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1933; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1934; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] 1935; NO-VP: for.end: 1936; NO-VP-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 1937; NO-VP-NEXT: ret i32 [[ANYOF_LCSSA]] 1938; 1939entry: 1940 br label %for.body 1941 1942for.body: 1943 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1944 %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ] 1945 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1946 %0 = load i32, ptr %arrayidx, align 4 1947 %cmp.i = icmp slt i32 %0, 3 1948 %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx 1949 %iv.next = add nuw nsw i64 %iv, 1 1950 %exitcond.not = icmp eq i64 %iv.next, %n 1951 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 1952 1953for.end: 1954 ret i32 %anyof 1955} 1956 1957define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { 1958; IF-EVL-LABEL: @anyof_fcmp( 1959; IF-EVL-NEXT: entry: 1960; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] 1961; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 1962; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 1963; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 1964; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1965; IF-EVL: vector.ph: 1966; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1967; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 1968; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 1969; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 1970; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 1971; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1972; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 1973; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 1974; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 1975; IF-EVL: vector.body: 1976; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1977; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 1978; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 1979; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 1980; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 1981; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 1982; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] 1983; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 1984; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 1985; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00) 1986; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]] 1987; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) 1988; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 1989; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 1990; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 1991; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1992; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1993; IF-EVL: middle.block: 1994; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]]) 1995; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] 1996; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] 1997; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1998; IF-EVL: scalar.ph: 1999; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2000; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 2001; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 2002; IF-EVL: for.body: 2003; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2004; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 2005; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 2006; IF-EVL-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 2007; IF-EVL-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP20]], 3.000000e+00 2008; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 2009; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2010; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2011; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 2012; IF-EVL: for.end: 2013; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 2014; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] 2015; 2016; NO-VP-LABEL: @anyof_fcmp( 2017; NO-VP-NEXT: entry: 2018; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2019; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 2020; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 2021; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2022; NO-VP: vector.ph: 2023; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2024; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 2025; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 2026; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2027; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2028; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 2029; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 2030; NO-VP: vector.body: 2031; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2032; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 2033; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2034; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] 2035; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 2036; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 2037; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 2038; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] 2039; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2040; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2041; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 2042; NO-VP: middle.block: 2043; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]]) 2044; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] 2045; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] 2046; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 2047; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2048; NO-VP: scalar.ph: 2049; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2050; NO-VP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 2051; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 2052; NO-VP: for.body: 2053; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2054; NO-VP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] 2055; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 2056; NO-VP-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 2057; NO-VP-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP14]], 3.000000e+00 2058; NO-VP-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] 2059; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2060; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2061; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] 2062; NO-VP: for.end: 2063; NO-VP-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] 2064; NO-VP-NEXT: ret i32 [[ANYOF_LCSSA]] 2065; 2066entry: 2067 br label %for.body 2068 2069for.body: 2070 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2071 %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ] 2072 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 2073 %0 = load float, ptr %arrayidx, align 4 2074 %cmp.i = fcmp fast olt float %0, 3.0 2075 %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx 2076 %iv.next = add nuw nsw i64 %iv, 1 2077 %exitcond.not = icmp eq i64 %iv.next, %n 2078 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 2079 2080for.end: 2081 ret i32 %anyof 2082} 2083 2084declare float @llvm.minimum.f32(float, float) 2085declare float @llvm.maximum.f32(float, float) 2086declare float @llvm.fmuladd.f32(float, float, float) 2087 2088attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } 2089 2090!0 = distinct !{!0, !1} 2091!1 = !{!"llvm.loop.vectorize.enable", i1 true} 2092