1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH 3; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN 4; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE 5 6define void @reduce_fadd_bfloat() { 7; FP-REDUCE-LABEL: 'reduce_fadd_bfloat' 8; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) 9; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) 10; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) 11; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) 12; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) 13; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) 14; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) 15; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) 16; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef) 17; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef) 18; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef) 19; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef) 20; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef) 21; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef) 22; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 23; 24; SIZE-LABEL: 'reduce_fadd_bfloat' 25; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) 26; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) 27; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) 28; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) 29; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) 30; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) 31; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) 32; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) 33; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef) 34; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef) 35; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef) 36; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef) 37; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef) 38; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef) 39; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 40; 41 %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) 42 %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef) 43 %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef) 44 %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef) 45 %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef) 46 %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) 47 %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) 48 %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) 49 %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef) 50 %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef) 51 %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef) 52 %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef) 53 %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef) 54 %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef) 55 ret void 56} 57 58define void @reduce_fadd_half() { 59; FP-REDUCE-ZVFH-LABEL: 'reduce_fadd_half' 60; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 61; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 62; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 63; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 64; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 65; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 66; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 67; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 68; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 69; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 70; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 71; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 72; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 73; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 74; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 75; 76; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half' 77; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 78; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 79; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 80; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 81; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 82; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 83; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 84; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 85; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 86; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 87; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 88; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 89; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 90; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 91; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 92; 93; SIZE-LABEL: 'reduce_fadd_half' 94; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 95; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 96; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 97; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 98; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 99; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 100; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 101; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 102; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 103; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 104; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 105; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 106; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 107; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 108; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 109; 110 %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) 111 %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) 112 %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) 113 %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) 114 %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) 115 %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) 116 %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) 117 %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) 118 %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef) 119 %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef) 120 %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef) 121 %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef) 122 %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef) 123 %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef) 124 ret void 125} 126 127define void @reduce_fadd_float() { 128; FP-REDUCE-LABEL: 'reduce_fadd_float' 129; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) 130; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) 131; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 132; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 133; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) 134; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) 135; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) 136; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) 137; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) 138; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef) 139; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef) 140; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef) 141; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef) 142; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 143; 144; SIZE-LABEL: 'reduce_fadd_float' 145; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) 146; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) 147; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 148; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 149; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) 150; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) 151; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) 152; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) 153; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) 154; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef) 155; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef) 156; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef) 157; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef) 158; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 159; 160 %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) 161 %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) 162 %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 163 %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 164 %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) 165 %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) 166 %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) 167 %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) 168 %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef) 169 %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef) 170 %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef) 171 %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef) 172 %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef) 173 ret void 174} 175 176define void @reduce_fadd_double() { 177; FP-REDUCE-LABEL: 'reduce_fadd_double' 178; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) 179; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 180; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 181; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) 182; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) 183; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) 184; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) 185; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) 186; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef) 187; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef) 188; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef) 189; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef) 190; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 191; 192; SIZE-LABEL: 'reduce_fadd_double' 193; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) 194; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 195; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 196; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) 197; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) 198; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) 199; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) 200; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) 201; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef) 202; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef) 203; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef) 204; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef) 205; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 206; 207 %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) 208 %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 209 %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 210 %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) 211 %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) 212 %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) 213 %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) 214 %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) 215 %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef) 216 %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef) 217 %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef) 218 %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef) 219 ret void 220} 221 222define void @reduce_ordered_fadd_bfloat() { 223; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat' 224; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) 225; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) 226; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) 227; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) 228; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) 229; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) 230; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) 231; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) 232; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef) 233; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef) 234; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef) 235; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef) 236; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef) 237; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef) 238; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 239; 240; SIZE-LABEL: 'reduce_ordered_fadd_bfloat' 241; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) 242; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) 243; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) 244; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) 245; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) 246; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) 247; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) 248; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) 249; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef) 250; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef) 251; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef) 252; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef) 253; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef) 254; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef) 255; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 256; 257 %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) 258 %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef) 259 %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef) 260 %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef) 261 %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef) 262 %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) 263 %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) 264 %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) 265 %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef) 266 %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef) 267 %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef) 268 %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef) 269 %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef) 270 %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef) 271 ret void 272} 273 274define void @reduce_ordered_fadd_half() { 275; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half' 276; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 277; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 278; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 279; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 280; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 281; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 282; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 283; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 284; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 285; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 286; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 287; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 288; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 289; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 290; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 291; 292; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half' 293; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 294; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 295; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 296; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 297; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 298; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 299; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 300; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 301; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 302; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 303; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 304; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 305; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 306; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 307; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 308; 309; SIZE-LABEL: 'reduce_ordered_fadd_half' 310; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) 311; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) 312; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) 313; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) 314; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) 315; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) 316; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) 317; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) 318; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef) 319; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef) 320; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef) 321; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef) 322; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef) 323; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef) 324; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 325; 326 %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) 327 %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) 328 %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) 329 %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) 330 %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) 331 %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) 332 %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) 333 %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) 334 %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef) 335 %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef) 336 %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef) 337 %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef) 338 %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef) 339 %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef) 340 ret void 341} 342 343define void @reduce_ordered_fadd_float() { 344; FP-REDUCE-LABEL: 'reduce_ordered_fadd_float' 345; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) 346; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) 347; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 348; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 349; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) 350; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) 351; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) 352; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) 353; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) 354; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef) 355; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef) 356; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef) 357; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef) 358; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 359; 360; SIZE-LABEL: 'reduce_ordered_fadd_float' 361; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) 362; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) 363; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) 364; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) 365; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) 366; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) 367; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) 368; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) 369; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) 370; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef) 371; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef) 372; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef) 373; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef) 374; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 375; 376 %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) 377 %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) 378 %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) 379 %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) 380 %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) 381 %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) 382 %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) 383 %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) 384 %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef) 385 %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef) 386 %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef) 387 %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef) 388 %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef) 389 ret void 390} 391 392define void @reduce_ordered_fadd_double() { 393; FP-REDUCE-LABEL: 'reduce_ordered_fadd_double' 394; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) 395; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 396; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 397; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) 398; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) 399; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) 400; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) 401; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) 402; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef) 403; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef) 404; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef) 405; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef) 406; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 407; 408; SIZE-LABEL: 'reduce_ordered_fadd_double' 409; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) 410; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) 411; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) 412; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) 413; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) 414; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) 415; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) 416; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) 417; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef) 418; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef) 419; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef) 420; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef) 421; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 422; 423 %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) 424 %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) 425 %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) 426 %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) 427 %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) 428 %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) 429 %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) 430 %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) 431 %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef) 432 %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef) 433 %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef) 434 %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef) 435 ret void 436} 437