1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 3; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-F16 4 5target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 6 7define void @reduce_umin() { 8; CHECK-LABEL: 'reduce_umin' 9; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) 10; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef) 11; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 12; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 13; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 14; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 15; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 16; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 17; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 18; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 19; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 20; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 21; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 22; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 23; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 24; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 25; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 26; 27 %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) 28 %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef) 29 %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 30 %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 31 %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 32 %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 33 %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 34 %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 35 %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 36 %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 37 %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 38 %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 39 %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 40 %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 41 %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 42 %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 43 ret void 44} 45 46define void @reduce_umax() { 47; CHECK-LABEL: 'reduce_umax' 48; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) 49; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef) 50; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) 51; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) 52; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) 53; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) 54; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) 55; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) 56; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) 57; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) 58; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) 59; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) 60; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 61; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 62; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 63; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 64; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 65; 66 %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) 67 %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef) 68 %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) 69 %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) 70 %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) 71 %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) 72 %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) 73 %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) 74 %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) 75 %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) 76 %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) 77 %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) 78 %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) 79 %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) 80 %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) 81 %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) 82 ret void 83} 84 85define void @reduce_smin() { 86; CHECK-LABEL: 'reduce_smin' 87; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) 88; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef) 89; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) 90; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) 91; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) 92; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) 93; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) 94; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) 95; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) 96; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) 97; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) 98; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) 99; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 100; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 101; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 102; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 103; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 104; 105 %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) 106 %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef) 107 %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) 108 %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) 109 %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) 110 %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) 111 %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) 112 %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) 113 %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) 114 %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) 115 %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) 116 %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) 117 %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) 118 %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) 119 %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) 120 %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) 121 ret void 122} 123 124define void @reduce_smax() { 125; CHECK-LABEL: 'reduce_smax' 126; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) 127; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef) 128; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 129; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 130; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 131; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 132; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 133; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 134; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 135; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 136; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 137; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 138; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 139; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 140; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 141; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 142; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 143; 144 %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) 145 %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef) 146 %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 147 %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 148 %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 149 %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 150 %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 151 %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 152 %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 153 %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 154 %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 155 %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 156 %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 157 %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 158 %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 159 %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 160 ret void 161} 162 163define void @reduce_fmin16() { 164; CHECK-NOF16-LABEL: 'reduce_fmin16' 165; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) 166; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) 167; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) 168; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) 169; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) 170; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) 171; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) 172; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef) 173; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 174; 175; CHECK-F16-LABEL: 'reduce_fmin16' 176; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) 177; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) 178; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) 179; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) 180; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) 181; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) 182; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) 183; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef) 184; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 185; 186 %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) 187 %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) 188 %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) 189 %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) 190 %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) 191 %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) 192 %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) 193 %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef) 194 ret void 195} 196 197define void @reduce_fmax16() { 198; CHECK-NOF16-LABEL: 'reduce_fmax16' 199; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) 200; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) 201; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) 202; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) 203; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) 204; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) 205; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) 206; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef) 207; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 208; 209; CHECK-F16-LABEL: 'reduce_fmax16' 210; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) 211; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) 212; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) 213; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) 214; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) 215; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) 216; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) 217; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef) 218; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 219; 220 %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) 221 %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) 222 %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) 223 %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) 224 %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) 225 %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) 226 %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) 227 %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef) 228 ret void 229} 230 231define void @reduce_fmin() { 232; CHECK-LABEL: 'reduce_fmin' 233; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) 234; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 235; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 236; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 237; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 238; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) 239; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) 240; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) 241; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) 242; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) 243; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 244; 245 %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) 246 %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) 247 %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) 248 %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) 249 %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) 250 %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) 251 %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef) 252 %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef) 253 %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) 254 %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef) 255 ret void 256} 257 258define void @reduce_fmax() { 259; CHECK-LABEL: 'reduce_fmax' 260; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) 261; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 262; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 263; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 264; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 265; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) 266; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) 267; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) 268; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) 269; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) 270; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 271; 272 %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) 273 %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) 274 %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) 275 %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) 276 %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) 277 %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) 278 %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef) 279 %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef) 280 %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) 281 %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef) 282 ret void 283} 284 285declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>) 286declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>) 287declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) 288declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) 289declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) 290declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) 291declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) 292declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) 293declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) 294declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) 295declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) 296declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) 297declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 298declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) 299declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 300declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 301 302declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>) 303declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>) 304declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) 305declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) 306declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) 307declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) 308declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) 309declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) 310declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) 311declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) 312declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) 313declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) 314declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 315declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) 316declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 317declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 318 319declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>) 320declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>) 321declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) 322declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) 323declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) 324declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) 325declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) 326declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) 327declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) 328declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) 329declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) 330declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) 331declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 332declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) 333declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 334declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 335 336declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>) 337declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>) 338declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) 339declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) 340declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) 341declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) 342declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) 343declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) 344declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) 345declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) 346declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) 347declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) 348declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 349declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) 350declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 351declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 352 353declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) 354declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) 355declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) 356declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) 357declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 358declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 359declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 360declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 361declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 362 363declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) 364declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) 365declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) 366declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) 367declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) 368declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 369declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) 370declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 371declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 372 373declare half @llvm.vector.reduce.fminimum.v2f16(<2 x half>) 374declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>) 375declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>) 376declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>) 377declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>) 378declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>) 379declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>) 380declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) 381declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>) 382 383declare half @llvm.vector.reduce.fmaximum.v2f16(<2 x half>) 384declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>) 385declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>) 386declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>) 387declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>) 388declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) 389declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>) 390declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) 391declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>) 392