1; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \ 2; RUN: < %s | FileCheck %s 3; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \ 4; RUN: -scalable-vectorization=on -S < %s | FileCheck %s -check-prefix=SCALABLE 5 6target triple = "riscv64" 7 8define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { 9; CHECK-LABEL: @select_icmp 10; CHECK: vector.ph: 11; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 12; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 13; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 14; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 15; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 16; CHECK: vector.body: 17; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 18; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 19; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 20; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] 21; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 22; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 23; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 24; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 25; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) 31; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] 32; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 33; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 34; 35; SCALABLE-LABEL: @select_icmp 36; SCALABLE: vector.ph: 37; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 38; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 39; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 40; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 41; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 42; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 43; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0 44; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 45; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 46; SCALABLE: vector.body: 47; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 48; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 49; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 50; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP4]] 51; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 52; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 53; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 54; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) 55; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 56; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] 57; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 58; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 59; SCALABLE: middle.block: 60; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) 61; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] 62; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 63; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 64; 65entry: 66 br label %for.body 67 68for.body: 69 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 70 %a = phi i32 [ 0, %entry], [ %cond, %for.body ] 71 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv 72 %0 = load i32, ptr %arrayidx, align 4 73 %cmp1 = icmp slt i32 %0, %x 74 %cond = select i1 %cmp1, i32 %a, i32 %y 75 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 76 %exitcond.not = icmp eq i64 %indvars.iv.next, %n 77 br i1 %exitcond.not, label %for.end, label %for.body 78 79for.end: 80 ret i32 %cond 81} 82 83define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { 84; CHECK-LABEL: @select_fcmp 85; CHECK: vector.ph: 86; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 87; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 88; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 89; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 90; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 91; CHECK: vector.body: 92; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 93; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 94; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 95; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]] 96; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 97; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 98; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 99; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 100; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] 101; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 102; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 103; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 104; CHECK: middle.block: 105; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) 106; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] 107; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 108; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 109; 110; SCALABLE-LABEL: @select_fcmp 111; SCALABLE: vector.ph: 112; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 113; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 114; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 115; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 116; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 117; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 118; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i64 0 119; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 120; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 121; SCALABLE: vector.body: 122; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 123; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 124; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 125; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP4]] 126; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 127; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 128; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 129; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) 130; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 131; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] 132; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 133; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 134; SCALABLE: middle.block: 135; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) 136; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] 137; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 138; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 139; 140entry: 141 br label %for.body 142 143for.body: 144 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 145 %a = phi i32 [ 0, %entry], [ %cond, %for.body ] 146 %arrayidx = getelementptr inbounds float, ptr %c, i64 %indvars.iv 147 %0 = load float, ptr %arrayidx, align 4 148 %cmp1 = fcmp fast olt float %0, %x 149 %cond = select i1 %cmp1, i32 %a, i32 %y 150 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 151 %exitcond.not = icmp eq i64 %indvars.iv.next, %n 152 br i1 %exitcond.not, label %for.end, label %for.body 153 154for.end: 155 ret i32 %cond 156} 157 158define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { 159; CHECK-LABEL: @select_const_i32_from_icmp 160; CHECK: vector.ph: 161; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 162; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 163; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 164; CHECK: vector.body: 165; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 166; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 167; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 168; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] 169; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 170; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 171; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) 172; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 173; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] 174; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 175; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 176; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 177; CHECK: middle.block: 178; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) 179; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] 180; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 181; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 182; 183; SCALABLE-LABEL: @select_const_i32_from_icmp 184; SCALABLE: vector.ph: 185; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 186; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 187; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 188; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 189; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 190; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 191; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 192; SCALABLE: vector.body: 193; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 194; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 195; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 196; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] 197; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 198; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 199; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 200; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) 201; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 202; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] 203; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 204; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 205; SCALABLE: middle.block: 206; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) 207; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] 208; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 209; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 210; 211entry: 212 br label %for.body 213 214for.body: ; preds = %entry, %for.body 215 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 216 %1 = phi i32 [ 3, %entry ], [ %5, %for.body ] 217 %2 = getelementptr inbounds i32, ptr %v, i64 %0 218 %3 = load i32, ptr %2, align 4 219 %4 = icmp eq i32 %3, 3 220 %5 = select i1 %4, i32 %1, i32 7 221 %6 = add nuw nsw i64 %0, 1 222 %7 = icmp eq i64 %6, %n 223 br i1 %7, label %exit, label %for.body 224 225exit: ; preds = %for.body 226 ret i32 %5 227} 228 229define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { 230; CHECK-LABEL: @select_i32_from_icmp 231; CHECK: vector.ph: 232; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 233; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 234; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 235; CHECK: vector.body: 236; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 237; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 238; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 239; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] 240; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 241; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 242; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) 243; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 244; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] 245; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 246; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 247; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 248; CHECK: middle.block: 249; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) 250; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] 251; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a 252; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 253; 254; SCALABLE-LABEL: @select_i32_from_icmp 255; SCALABLE: vector.ph: 256; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 257; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 258; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 259; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 260; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 261; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 262; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 263; SCALABLE: vector.body: 264; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 265; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 266; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 267; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] 268; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 269; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 270; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 271; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) 272; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 273; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] 274; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 275; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 276; SCALABLE: middle.block: 277; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) 278; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] 279; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a 280; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 281; 282entry: 283 br label %for.body 284 285for.body: ; preds = %entry, %for.body 286 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 287 %1 = phi i32 [ %a, %entry ], [ %5, %for.body ] 288 %2 = getelementptr inbounds i32, ptr %v, i64 %0 289 %3 = load i32, ptr %2, align 4 290 %4 = icmp eq i32 %3, 3 291 %5 = select i1 %4, i32 %1, i32 %b 292 %6 = add nuw nsw i64 %0, 1 293 %7 = icmp eq i64 %6, %n 294 br i1 %7, label %exit, label %for.body 295 296exit: ; preds = %for.body 297 ret i32 %5 298} 299 300define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { 301; CHECK-LABEL: @select_const_i32_from_fcmp 302; CHECK: vector.ph: 303; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 304; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 305; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 306; CHECK: vector.body: 307; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 308; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 309; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 310; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] 311; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 312; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 313; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 314; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 315; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] 316; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 317; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 318; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 319; CHECK: middle.block: 320; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) 321; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] 322; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 323; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 324; 325; SCALABLE-LABEL: @select_const_i32_from_fcmp 326; SCALABLE: vector.ph: 327; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 328; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 329; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 330; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 331; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 332; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 333; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 334; SCALABLE: vector.body: 335; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 336; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 337; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 338; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP4]] 339; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 340; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 341; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) 342; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) 343; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] 344; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] 345; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 346; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 347; SCALABLE: middle.block: 348; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) 349; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] 350; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 351; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 352; 353entry: 354 br label %for.body 355 356for.body: ; preds = %entry, %for.body 357 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 358 %1 = phi i32 [ 2, %entry ], [ %5, %for.body ] 359 %2 = getelementptr inbounds float, ptr %v, i64 %0 360 %3 = load float, ptr %2, align 4 361 %4 = fcmp fast ueq float %3, 3.0 362 %5 = select i1 %4, i32 %1, i32 1 363 %6 = add nuw nsw i64 %0, 1 364 %7 = icmp eq i64 %6, %n 365 br i1 %7, label %exit, label %for.body 366 367exit: ; preds = %for.body 368 ret i32 %5 369} 370 371define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { 372; CHECK-LABEL: @select_const_f32_from_icmp 373; CHECK-NOT: vector.body 374; 375; SCALABLE-LABEL: @select_const_f32_from_icmp 376; SCALABLE-NOT: vector.body 377; 378entry: 379 br label %for.body 380 381for.body: ; preds = %entry, %for.body 382 %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] 383 %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ] 384 %2 = getelementptr inbounds i32, ptr %v, i64 %0 385 %3 = load i32, ptr %2, align 4 386 %4 = icmp eq i32 %3, 3 387 %5 = select fast i1 %4, float %1, float 7.0 388 %6 = add nuw nsw i64 %0, 1 389 %7 = icmp eq i64 %6, %n 390 br i1 %7, label %exit, label %for.body 391 392exit: ; preds = %for.body 393 ret float %5 394} 395 396define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { 397; CHECK-LABEL: @pred_select_const_i32_from_icmp 398; CHECK: vector.ph: 399; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 400; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 401; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 402; CHECK: vector.body: 403; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 404; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] 405; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 406; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] 407; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 408; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 409; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 35) 410; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP0]] 411; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 412; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) 413; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) 414; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] 415; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] 416; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 417; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 418; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 419; CHECK: middle.block: 420; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) 421; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] 422; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 423; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 424; 425; SCALABLE-LABEL: @pred_select_const_i32_from_icmp 426; SCALABLE: vector.ph: 427; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 428; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 429; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] 430; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] 431; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 432; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 433; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 434; SCALABLE: vector.body: 435; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 436; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] 437; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 438; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP4]] 439; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 440; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 441; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35) 442; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP4]] 443; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 444; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison) 445; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) 446; SCALABLE-NEXT: [[TMP13:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP12]] 447; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> [[VEC_PHI]] 448; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] 449; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 450; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 451; SCALABLE: middle.block: 452; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]]) 453; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] 454; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 455; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec 456; 457entry: 458 br label %for.body 459 460for.body: ; preds = %entry, %for.inc 461 %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] 462 %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ] 463 %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013 464 %0 = load i32, ptr %arrayidx, align 4 465 %cmp1 = icmp sgt i32 %0, 35 466 br i1 %cmp1, label %if.then, label %for.inc 467 468if.then: ; preds = %for.body 469 %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013 470 %1 = load i32, ptr %arrayidx2, align 4 471 %cmp3 = icmp eq i32 %1, 2 472 %spec.select = select i1 %cmp3, i32 1, i32 %r.012 473 br label %for.inc 474 475for.inc: ; preds = %if.then, %for.body 476 %r.1 = phi i32 [ %r.012, %for.body ], [ %spec.select, %if.then ] 477 %inc = add nuw nsw i64 %i.013, 1 478 %exitcond.not = icmp eq i64 %inc, %n 479 br i1 %exitcond.not, label %for.end.loopexit, label %for.body 480 481for.end.loopexit: ; preds = %for.inc 482 %r.1.lcssa = phi i32 [ %r.1, %for.inc ] 483 ret i32 %r.1.lcssa 484} 485 486attributes #0 = { "target-features"="+f,+v" } 487