1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \ 3; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15 4 5define void @fadd_reductions() { 6; Z15-LABEL: 'fadd_reductions' 7; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 8; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 9; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 10; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 11; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 12; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 13; 14 %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 15 %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 16 %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 17 %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 18 %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 19 ret void 20} 21 22define void @fast_fadd_reductions(ptr %src, ptr %dst) { 23; Z15-LABEL: 'fast_fadd_reductions' 24; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 25; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 26; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 27; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 28; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 29; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 30; 31 %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 32 %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 33 %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 34 %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 35 %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) 36 ret void 37} 38 39define void @fmul_reductions() { 40; Z15-LABEL: 'fmul_reductions' 41; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) 42; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) 43; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) 44; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) 45; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 46; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 47; 48 %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) 49 %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) 50 %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) 51 %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) 52 %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 53 ret void 54} 55 56define void @fast_fmul_reductions() { 57; Z15-LABEL: 'fast_fmul_reductions' 58; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) 59; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) 60; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) 61; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) 62; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 63; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 64; 65 %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) 66 %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) 67 %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) 68 %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) 69 %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) 70 71 ret void 72} 73 74define void @fmin_reductions() { 75; Z15-LABEL: 'fmin_reductions' 76; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 77; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 78; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 79; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 80; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) 81; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 82; 83 %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 84 %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 85 %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 86 %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 87 %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) 88 ret void 89} 90 91define void @fmax_reductions() { 92; Z15-LABEL: 'fmax_reductions' 93; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 94; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 95; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 96; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 97; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) 98; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 99; 100 %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 101 %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 102 %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 103 %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 104 %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) 105 ret void 106} 107 108define void @reduceumin() { 109; Z15-LABEL: 'reduceumin' 110; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 111; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 112; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 113; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 114; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 115; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) 116; 117 %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 118 %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 119 %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 120 %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 121 122 %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 123 %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) 124 125 ret void 126} 127 128define void @reduceumax() { 129; Z15-LABEL: 'reduceumax' 130; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 131; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 132; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 133; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 134; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) 135; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) 136; 137 %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 138 %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 139 %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 140 %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 141 142 %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) 143 %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) 144 145 ret void 146} 147 148define void @reducesmin() { 149; Z15-LABEL: 'reducesmin' 150; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 151; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 152; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 153; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 154; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) 155; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) 156; 157 %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 158 %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 159 %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 160 %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 161 162 %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) 163 %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) 164 165 ret void 166} 167 168define void @reducesmax() { 169; Z15-LABEL: 'reducesmax' 170; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 171; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 172; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 173; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 174; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 175; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) 176; 177 %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 178 %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 179 %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 180 %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 181 182 %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 183 %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) 184 185 ret void 186} 187 188define void @reduceadd() { 189; Z15-LABEL: 'reduceadd' 190; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 191; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 192; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 193; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 194; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 195; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 196; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 197; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 198; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 199; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 200; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 201; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 202; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 203; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 204; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 205; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 206; 207; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 208; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) 209 210 ; REDUCEADD64 211 %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 212 %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 213 %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 214 %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 215 ; REDUCEADD32 216 %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 217 %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 218 %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 219 %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 220 ; REDUCEADD16 221 %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 222 %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 223 %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 224 %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 225 ; REDUCEADD8 226 %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 227 %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 228 %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 229 %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 230 ; EXTREME VALUES 231 %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 232 %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) 233 234 ret void 235} 236 237define void @reducemul() { 238; CHECK-LABEL: 'reducemul' 239; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) 240; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) 241; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) 242; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) 243; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) 244; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) 245; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) 246; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) 247; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) 248; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) 249; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) 250; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) 251; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) 252; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) 253; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) 254; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) 255; 256; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) 257; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) 258 259 ; REDUCEADD64 260 %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) 261 %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) 262 %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) 263 %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) 264 ; REDUCEADD32 265 %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) 266 %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) 267 %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) 268 %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) 269 ; REDUCEADD16 270 %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) 271 %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) 272 %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) 273 %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) 274 ; REDUCEADD8 275 %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) 276 %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) 277 %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) 278 %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) 279 ; EXTREME VALUES 280 %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) 281 %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) 282 283 ret void 284} 285 286declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 287declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) 288declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) 289declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) 290declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) 291 292declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) 293declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) 294declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) 295declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) 296declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>) 297 298declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 299declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 300declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 301declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 302declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) 303 304declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 305declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) 306declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 307declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 308declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) 309 310declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 311declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 312declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 313declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) 314declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) 315declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>) 316 317declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 318declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 319declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 320declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) 321declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) 322declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>) 323 324declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 325declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 326declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 327declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) 328declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) 329declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>) 330 331declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 332declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 333declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 334declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) 335declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) 336declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>) 337 338declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 339declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 340declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 341declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 342declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 343declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 344declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 345declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 346declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) 347declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 348declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 349declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 350declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) 351declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) 352declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 353declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 354 355declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) 356declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) 357 358declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) 359declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) 360declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) 361declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) 362declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) 363declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 364declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) 365declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) 366declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) 367declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) 368declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 369declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) 370declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) 371declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) 372declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) 373declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) 374 375declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) 376declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>) 377