1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck --check-prefixes=CHECK,POWEROF2 %s 3; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 -slp-vectorize-non-power-of-2 %s | FileCheck --check-prefixes=CHECK,NONPOWEROF2 %s 4 5define i32 @test() { 6; CHECK-LABEL: @test( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: br label [[IF_END_I87:%.*]] 9; CHECK: if.end.i87: 10; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 11; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2) 12; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 13; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ 14; CHECK-NEXT: i32 1, label [[SW_BB509_I]] 15; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]] 16; CHECK-NEXT: ] 17; CHECK: if.then458.i: 18; CHECK-NEXT: br label [[SW_BB509_I]] 19; CHECK: sw.bb509.i: 20; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ] 21; CHECK-NEXT: ret i32 0 22; 23entry: 24 %getelementptr0 = getelementptr i8, ptr null, i64 64036 25 %getelementptr1 = getelementptr i8, ptr null, i64 64064 26 br label %if.end.i87 27 28if.end.i87: ; preds = %entry 29 %0 = load <2 x i32>, ptr %getelementptr0, align 4 30 %1 = load <2 x i32>, ptr %getelementptr1, align 8 31 switch i32 0, label %sw.bb509.i [ 32 i32 1, label %sw.bb509.i 33 i32 0, label %if.then458.i 34 ] 35 36if.then458.i: ; preds = %if.end.i87 37 br label %sw.bb509.i 38 39sw.bb509.i: ; preds = %if.then458.i, %if.end.i87, %if.end.i87 40 %4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ] 41 %5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ] 42 ret i32 0 43} 44 45define void @test2() { 46; CHECK-LABEL: @test2( 47; CHECK-NEXT: entry: 48; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr null, i64 132 49; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr null, i64 200 50; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr null, i64 300 51; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 52; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 53; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 54; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) 55; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) 56; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) 57; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> 58; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) 59; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) 60; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) 61; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) 62; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] 63; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> 64; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) 65; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) 66; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) 67; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) 68; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] 69; CHECK-NEXT: ret void 70; 71entry: 72 %0 = getelementptr i8, ptr null, i64 132 73 %1 = getelementptr i8, ptr null, i64 164 74 %2 = getelementptr i8, ptr null, i64 200 75 %3 = getelementptr i8, ptr null, i64 300 76 %4 = load <8 x float>, ptr %0, align 4 77 %5 = load <8 x float>, ptr %1, align 4 78 %6 = load <8 x float>, ptr %2, align 4 79 %7 = load <8 x float>, ptr %3, align 4 80 %8 = fpext <8 x float> %4 to <8 x double> 81 %9 = fpext <8 x float> %5 to <8 x double> 82 %10 = fpext <8 x float> %6 to <8 x double> 83 %11 = fpext <8 x float> %7 to <8 x double> 84 %12 = fadd <8 x double> zeroinitializer, %8 85 %13 = fadd <8 x double> zeroinitializer, %9 86 %14 = fadd <8 x double> zeroinitializer, %10 87 %15 = fadd <8 x double> zeroinitializer, %11 88 %16 = fptrunc <8 x double> %12 to <8 x float> 89 %17 = fptrunc <8 x double> %13 to <8 x float> 90 %18 = fptrunc <8 x double> %14 to <8 x float> 91 %19 = fptrunc <8 x double> %15 to <8 x float> 92 %20 = fcmp ogt <8 x float> zeroinitializer, %16 93 %21 = fcmp ogt <8 x float> zeroinitializer, %17 94 %22 = fcmp ogt <8 x float> zeroinitializer, %18 95 %23 = fcmp ogt <8 x float> zeroinitializer, %19 96 ret void 97} 98 99define void @test3(float %0) { 100; CHECK-LABEL: @test3( 101; CHECK-NEXT: entry: 102; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] 103; CHECK: for.body.lr.ph: 104; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) 105; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) 106; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] 107; CHECK: for.cond.cleanup: 108; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] 109; CHECK-NEXT: ret void 110; CHECK: for.body: 111; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 112; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] 113; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) 114; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) 115; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) 116; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 117; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] 118; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 119; 120entry: 121 br label %for.body.lr.ph 122 123for.body.lr.ph: 124 br i1 false, label %for.cond.cleanup, label %for.body 125 126for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph 127 %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ] 128 %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ] 129 ret void 130 131for.body: 132 %3 = load <2 x float>, ptr null, align 4 133 %4 = fcmp olt <2 x float> zeroinitializer, %3 134 %5 = select <2 x i1> <i1 true, i1 true>, <2 x float> %3, <2 x float> zeroinitializer 135 %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer 136 br label %for.cond.cleanup 137} 138 139define ptr @test4() { 140; POWEROF2-LABEL: @test4( 141; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer 142; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2> 143; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6> 144; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0> 145; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0) 146; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2) 147; POWEROF2-NEXT: br label [[TMP8:%.*]] 148; POWEROF2: 7: 149; POWEROF2-NEXT: br label [[TMP8]] 150; POWEROF2: 8: 151; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ] 152; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ] 153; POWEROF2-NEXT: br label [[TMP11:%.*]] 154; POWEROF2: 11: 155; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0) 156; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer 157; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2) 158; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]] 159; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 160; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]] 161; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 162; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00 163; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0 164; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]] 165; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 166; POWEROF2-NEXT: [[TMP23:%.*]] = fadd reassoc nsz float [[TMP22]], [[TMP19]] 167; POWEROF2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[TMP13]], i32 1 168; POWEROF2-NEXT: [[TMP25:%.*]] = fadd reassoc nsz float [[TMP21]], [[TMP24]] 169; POWEROF2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 170; POWEROF2-NEXT: [[TMP27:%.*]] = fadd reassoc nsz float [[TMP23]], [[TMP26]] 171; POWEROF2-NEXT: [[TMP28:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP25]]) 172; POWEROF2-NEXT: [[TMP29:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP27]]) 173; POWEROF2-NEXT: ret ptr null 174; 175; NONPOWEROF2-LABEL: @test4( 176; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer 177; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> 178; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6> 179; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0) 180; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3) 181; NONPOWEROF2-NEXT: br label [[TMP7:%.*]] 182; NONPOWEROF2: 6: 183; NONPOWEROF2-NEXT: br label [[TMP7]] 184; NONPOWEROF2: 7: 185; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ] 186; NONPOWEROF2-NEXT: br label [[TMP9:%.*]] 187; NONPOWEROF2: 9: 188; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0) 189; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]] 190; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3) 191; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]] 192; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]]) 193; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]]) 194; NONPOWEROF2-NEXT: [[TMP16:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP14]]) 195; NONPOWEROF2-NEXT: [[TMP17:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP15]]) 196; NONPOWEROF2-NEXT: ret ptr null 197; 198 %1 = fadd <8 x float> zeroinitializer, zeroinitializer 199 %2 = extractelement <8 x float> %1, i64 0 200 %3 = extractelement <8 x float> %1, i64 1 201 %4 = extractelement <8 x float> %1, i64 2 202 %5 = extractelement <8 x float> %1, i64 4 203 %6 = extractelement <8 x float> %1, i64 5 204 %7 = extractelement <8 x float> %1, i64 6 205 br label %9 206 2078: 208 br label %9 209 2109: 211 %10 = phi float [ 0.000000e+00, %8 ], [ %7, %0 ] 212 %11 = phi float [ 0.000000e+00, %8 ], [ %6, %0 ] 213 %12 = phi float [ 0.000000e+00, %8 ], [ %5, %0 ] 214 %13 = phi float [ 0.000000e+00, %8 ], [ %4, %0 ] 215 %14 = phi float [ 0.000000e+00, %8 ], [ %3, %0 ] 216 %15 = phi float [ 0.000000e+00, %8 ], [ %2, %0 ] 217 br label %16 218 21916: 220 %17 = fmul float %14, 0.000000e+00 221 %18 = fmul float 0.000000e+00, %11 222 %19 = fmul float 0.000000e+00, %15 223 %20 = fmul float %12, 0.000000e+00 224 %21 = fadd reassoc nsz float %17, %19 225 %22 = fadd reassoc nsz float %18, %20 226 %23 = fmul float %13, 0.000000e+00 227 %24 = fmul float %10, 0.000000e+00 228 %25 = fadd reassoc nsz float %21, %23 229 %26 = fadd reassoc nsz float %22, %24 230 %27 = tail call float @llvm.sqrt.f32(float %25) 231 %28 = tail call float @llvm.sqrt.f32(float %26) 232 ret ptr null 233} 234 235define i32 @test5() { 236; CHECK-LABEL: @test5( 237; CHECK-NEXT: entry: 238; CHECK-NEXT: [[TMP0:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> zeroinitializer, i64 0) 239; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP0]], <2 x double> zeroinitializer, i64 2) 240; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], [[TMP1]] 241; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 0) 242; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP3]], <2 x double> zeroinitializer, i64 2) 243; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP4]], <2 x double> zeroinitializer, i64 4) 244; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP5]], <2 x double> zeroinitializer, i64 6) 245; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 2) 246; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP7]], <2 x double> zeroinitializer, i64 6) 247; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> poison, <4 x double> [[TMP2]], i64 0) 248; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 2, i32 3, i32 14, i32 15> 249; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x double> [[TMP6]], [[TMP10]] 250; CHECK-NEXT: br label [[FOR_END47:%.*]] 251; CHECK: for.end47: 252; CHECK-NEXT: [[TMP12:%.*]] = phi <8 x double> [ [[TMP11]], [[ENTRY:%.*]] ] 253; CHECK-NEXT: ret i32 0 254; 255entry: 256 %div0 = fdiv <2 x double> zeroinitializer, zeroinitializer 257 %div1 = fdiv <2 x double> zeroinitializer, zeroinitializer 258 %add0 = fadd <2 x double> zeroinitializer, %div0 259 %add1 = fadd <2 x double> zeroinitializer, zeroinitializer 260 %add2 = fadd <2 x double> %div1, zeroinitializer 261 %add3 = fadd <2 x double> zeroinitializer, zeroinitializer 262 br label %for.end47 263 264for.end47: ; preds = %entry 265 %add0.lcssa = phi <2 x double> [ %add0, %entry ] 266 %add1.lcssa = phi <2 x double> [ %add1, %entry ] 267 %add2.lcssa = phi <2 x double> [ %add2, %entry ] 268 %add3.lcssa = phi <2 x double> [ %add3, %entry ] 269 ret i32 0 270} 271