1*866b9f43SDominik Steenken; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2*866b9f43SDominik Steenken; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \ 3*866b9f43SDominik Steenken; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15 4*866b9f43SDominik Steenken 5*866b9f43SDominik Steenkendefine void @fadd_reductions() { 6*866b9f43SDominik Steenken; Z15-LABEL: 'fadd_reductions' 7*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 8*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 9*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 10*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 11*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 12*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 13*866b9f43SDominik Steenken; 14*866b9f43SDominik Steenken %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 15*866b9f43SDominik Steenken %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 16*866b9f43SDominik Steenken %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 17*866b9f43SDominik Steenken %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 18*866b9f43SDominik Steenken %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 19*866b9f43SDominik Steenken ret void 20*866b9f43SDominik Steenken} 21*866b9f43SDominik Steenken 22*866b9f43SDominik Steenkendefine void @fast_fadd_reductions(ptr %src, ptr %dst) { 23*866b9f43SDominik Steenken; Z15-LABEL: 'fast_fadd_reductions' 24*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 25*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 26*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 27*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 28*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 29*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 30*866b9f43SDominik Steenken; 31*866b9f43SDominik Steenken %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 32*866b9f43SDominik Steenken %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 33*866b9f43SDominik Steenken %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 34*866b9f43SDominik Steenken %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 35*866b9f43SDominik Steenken %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 36*866b9f43SDominik Steenken ret void 37*866b9f43SDominik Steenken} 38*866b9f43SDominik Steenken 39*866b9f43SDominik Steenkendefine void @fmul_reductions() { 40*866b9f43SDominik Steenken; Z15-LABEL: 'fmul_reductions' 41*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) 42*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) 43*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) 44*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) 45*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 46*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 47*866b9f43SDominik Steenken; 48*866b9f43SDominik Steenken %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) 49*866b9f43SDominik Steenken %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) 50*866b9f43SDominik Steenken %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) 51*866b9f43SDominik Steenken %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) 52*866b9f43SDominik Steenken %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 53*866b9f43SDominik Steenken ret void 54*866b9f43SDominik Steenken} 55*866b9f43SDominik Steenken 56*866b9f43SDominik Steenkendefine void @fast_fmul_reductions() { 57*866b9f43SDominik Steenken; Z15-LABEL: 'fast_fmul_reductions' 58*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) 59*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) 60*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) 61*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) 62*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 63*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 64*866b9f43SDominik Steenken; 65*866b9f43SDominik Steenken %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) 66*866b9f43SDominik Steenken %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) 67*866b9f43SDominik Steenken %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) 68*866b9f43SDominik Steenken %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) 69*866b9f43SDominik Steenken %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 70*866b9f43SDominik Steenken 71*866b9f43SDominik Steenken ret void 72*866b9f43SDominik Steenken} 73*866b9f43SDominik Steenken 74*866b9f43SDominik Steenkendefine void @fmin_reductions() { 75*866b9f43SDominik Steenken; Z15-LABEL: 'fmin_reductions' 76*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 77*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 78*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 79*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 80*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) 81*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 82*866b9f43SDominik Steenken; 83*866b9f43SDominik Steenken %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 84*866b9f43SDominik Steenken %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 85*866b9f43SDominik Steenken %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 86*866b9f43SDominik Steenken %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 87*866b9f43SDominik Steenken %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) 88*866b9f43SDominik Steenken ret void 89*866b9f43SDominik Steenken} 90*866b9f43SDominik Steenken 91*866b9f43SDominik Steenkendefine void @fmax_reductions() { 92*866b9f43SDominik Steenken; Z15-LABEL: 'fmax_reductions' 93*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 94*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 95*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 96*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 97*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) 98*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 99*866b9f43SDominik Steenken; 100*866b9f43SDominik Steenken %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 101*866b9f43SDominik Steenken %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 102*866b9f43SDominik Steenken %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 103*866b9f43SDominik Steenken %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 104*866b9f43SDominik Steenken %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) 105*866b9f43SDominik Steenken ret void 106*866b9f43SDominik Steenken} 107*866b9f43SDominik Steenken 108*866b9f43SDominik Steenkendefine void @reduceumin() { 109*866b9f43SDominik Steenken; Z15-LABEL: 'reduceumin' 110*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 111*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 112*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 113*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 114*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 115*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) 116*866b9f43SDominik Steenken; 117*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 118*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 119*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 120*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 121*866b9f43SDominik Steenken 122*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 123*866b9f43SDominik Steenken %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) 124*866b9f43SDominik Steenken 125*866b9f43SDominik Steenken ret void 126*866b9f43SDominik Steenken} 127*866b9f43SDominik Steenken 128*866b9f43SDominik Steenkendefine void @reduceumax() { 129*866b9f43SDominik Steenken; Z15-LABEL: 'reduceumax' 130*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 131*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 132*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 133*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 134*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) 135*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) 136*866b9f43SDominik Steenken; 137*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 138*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 139*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 140*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 141*866b9f43SDominik Steenken 142*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) 143*866b9f43SDominik Steenken %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) 144*866b9f43SDominik Steenken 145*866b9f43SDominik Steenken ret void 146*866b9f43SDominik Steenken} 147*866b9f43SDominik Steenken 148*866b9f43SDominik Steenkendefine void @reducesmin() { 149*866b9f43SDominik Steenken; Z15-LABEL: 'reducesmin' 150*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 151*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 152*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 153*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 154*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) 155*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) 156*866b9f43SDominik Steenken; 157*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 158*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 159*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 160*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 161*866b9f43SDominik Steenken 162*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) 163*866b9f43SDominik Steenken %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) 164*866b9f43SDominik Steenken 165*866b9f43SDominik Steenken ret void 166*866b9f43SDominik Steenken} 167*866b9f43SDominik Steenken 168*866b9f43SDominik Steenkendefine void @reducesmax() { 169*866b9f43SDominik Steenken; Z15-LABEL: 'reducesmax' 170*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 171*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 172*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 173*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 174*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 175*866b9f43SDominik Steenken; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) 176*866b9f43SDominik Steenken; 177*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 178*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 179*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 180*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 181*866b9f43SDominik Steenken 182*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 183*866b9f43SDominik Steenken %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) 184*866b9f43SDominik Steenken 185*866b9f43SDominik Steenken ret void 186*866b9f43SDominik Steenken} 187*866b9f43SDominik Steenken 188*866b9f43SDominik Steenkendefine void @reduceadd() { 189*866b9f43SDominik Steenken; Z15-LABEL: 'reduceadd' 190*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 191*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 192*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 193*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 194*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 195*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 196*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 197*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 198*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 199*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 200*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 201*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 202*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 203*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 204*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 205*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 206*866b9f43SDominik Steenken; 207*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 208*866b9f43SDominik Steenken; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) 209*866b9f43SDominik Steenken 210*866b9f43SDominik Steenken ; REDUCEADD64 211*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 212*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 213*866b9f43SDominik Steenken %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 214*866b9f43SDominik Steenken %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 215*866b9f43SDominik Steenken ; REDUCEADD32 216*866b9f43SDominik Steenken %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 217*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 218*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 219*866b9f43SDominik Steenken %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 220*866b9f43SDominik Steenken ; REDUCEADD16 221*866b9f43SDominik Steenken %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 222*866b9f43SDominik Steenken %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 223*866b9f43SDominik Steenken %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 224*866b9f43SDominik Steenken %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 225*866b9f43SDominik Steenken ; REDUCEADD8 226*866b9f43SDominik Steenken %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 227*866b9f43SDominik Steenken %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 228*866b9f43SDominik Steenken %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 229*866b9f43SDominik Steenken %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 230*866b9f43SDominik Steenken ; EXTREME VALUES 231*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 232*866b9f43SDominik Steenken %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) 233*866b9f43SDominik Steenken 234*866b9f43SDominik Steenken ret void 235*866b9f43SDominik Steenken} 236*866b9f43SDominik Steenken 237*866b9f43SDominik Steenkendefine void @reducemul() { 238*866b9f43SDominik Steenken; CHECK-LABEL: 'reducemul' 239*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) 240*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) 241*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) 242*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) 243*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) 244*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) 245*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) 246*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) 247*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) 248*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) 249*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) 250*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) 251*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) 252*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) 253*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) 254*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) 255*866b9f43SDominik Steenken; 256*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) 257*866b9f43SDominik Steenken; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) 258*866b9f43SDominik Steenken 259*866b9f43SDominik Steenken ; REDUCEADD64 260*866b9f43SDominik Steenken %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) 261*866b9f43SDominik Steenken %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) 262*866b9f43SDominik Steenken %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) 263*866b9f43SDominik Steenken %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) 264*866b9f43SDominik Steenken ; REDUCEADD32 265*866b9f43SDominik Steenken %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) 266*866b9f43SDominik Steenken %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) 267*866b9f43SDominik Steenken %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) 268*866b9f43SDominik Steenken %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) 269*866b9f43SDominik Steenken ; REDUCEADD16 270*866b9f43SDominik Steenken %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) 271*866b9f43SDominik Steenken %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) 272*866b9f43SDominik Steenken %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) 273*866b9f43SDominik Steenken %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) 274*866b9f43SDominik Steenken ; REDUCEADD8 275*866b9f43SDominik Steenken %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) 276*866b9f43SDominik Steenken %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) 277*866b9f43SDominik Steenken %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) 278*866b9f43SDominik Steenken %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) 279*866b9f43SDominik Steenken ; EXTREME VALUES 280*866b9f43SDominik Steenken %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) 281*866b9f43SDominik Steenken %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) 282*866b9f43SDominik Steenken 283*866b9f43SDominik Steenken ret void 284*866b9f43SDominik Steenken} 285*866b9f43SDominik Steenken 286*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 287*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) 288*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) 289*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) 290*866b9f43SDominik Steenkendeclare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) 291*866b9f43SDominik Steenken 292*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) 293*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) 294*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) 295*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) 296*866b9f43SDominik Steenkendeclare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>) 297*866b9f43SDominik Steenken 298*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 299*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 300*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 301*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 302*866b9f43SDominik Steenkendeclare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) 303*866b9f43SDominik Steenken 304*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 305*866b9f43SDominik Steenkendeclare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) 306*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 307*866b9f43SDominik Steenkendeclare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 308*866b9f43SDominik Steenkendeclare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) 309*866b9f43SDominik Steenken 310*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 311*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 312*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 313*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) 314*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) 315*866b9f43SDominik Steenkendeclare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>) 316*866b9f43SDominik Steenken 317*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 318*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 319*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 320*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) 321*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) 322*866b9f43SDominik Steenkendeclare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>) 323*866b9f43SDominik Steenken 324*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 325*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 326*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 327*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) 328*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) 329*866b9f43SDominik Steenkendeclare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>) 330*866b9f43SDominik Steenken 331*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 332*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 333*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 334*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) 335*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) 336*866b9f43SDominik Steenkendeclare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>) 337*866b9f43SDominik Steenken 338*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 339*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 340*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 341*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 342*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 343*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 344*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 345*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 346*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) 347*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 348*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 349*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 350*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) 351*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) 352*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 353*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 354*866b9f43SDominik Steenken 355*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) 356*866b9f43SDominik Steenkendeclare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) 357*866b9f43SDominik Steenken 358*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) 359*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) 360*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) 361*866b9f43SDominik Steenkendeclare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) 362*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) 363*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 364*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) 365*866b9f43SDominik Steenkendeclare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) 366*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) 367*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) 368*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 369*866b9f43SDominik Steenkendeclare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) 370*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) 371*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) 372*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) 373*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) 374*866b9f43SDominik Steenken 375*866b9f43SDominik Steenkendeclare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) 376*866b9f43SDominik Steenkendeclare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>) 377