xref: /llvm-project/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll (revision 8b55162e195783dd27e1c69fb4d97971ef76725b)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
3; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
4; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
5
6define void @reduce_fmul_bfloat() {
7; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
8; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
9; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
10; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
11; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
12; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
13; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
14; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
15; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
16; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
17; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
18; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
19; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
20; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
21; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
22; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
23;
24; SIZE-LABEL: 'reduce_fmul_bfloat'
25; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
26; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
27; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
28; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
29; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
30; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
31; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
32; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
33; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
34; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
35; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
36; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
37; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
38; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
39; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
40;
41  %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
42  %V2 = call fast bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
43  %V4 = call fast bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
44  %V8 = call fast bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
45  %V16 = call fast bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
46  %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
47  %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
48  %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
49  %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
50  %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
51  %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
52  %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
53  %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
54  %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
55  ret void
56}
57
58define void @reduce_fmul_half() {
59; FP-REDUCE-ZVFH-LABEL: 'reduce_fmul_half'
60; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
61; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
62; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
63; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
64; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
65; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
66; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
67; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
68; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
69; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
70; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
71; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
72; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
73; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
74; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
75;
76; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
77; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
78; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
79; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
80; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
81; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
82; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
83; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
84; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
85; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
86; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
87; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
88; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
89; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
90; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
91; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
92;
93; SIZE-LABEL: 'reduce_fmul_half'
94; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
95; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
96; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
97; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
98; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
99; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
100; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
101; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
102; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
103; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
104; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
105; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
106; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
107; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
108; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
109;
110  %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
111  %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
112  %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
113  %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
114  %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
115  %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
116  %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
117  %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
118  %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
119  %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
120  %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
121  %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
122  %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
123  %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
124  ret void
125}
126
127define void @reduce_fmul_float() {
128; FP-REDUCE-LABEL: 'reduce_fmul_float'
129; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
130; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
131; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
132; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
133; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
134; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
135; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
136; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
137; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
138; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
139; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
140; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
141; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
142; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
143;
144; SIZE-LABEL: 'reduce_fmul_float'
145; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
146; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
147; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
148; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
149; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
150; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
151; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
152; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
153; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
154; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
155; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
156; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
157; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
158; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
159;
160  %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
161  %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
162  %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
163  %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
164  %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
165  %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
166  %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
167  %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
168  %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
169  %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
170  %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
171  %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
172  %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
173  ret void
174}
175
176define void @reduce_fmul_double() {
177; FP-REDUCE-LABEL: 'reduce_fmul_double'
178; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
179; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
180; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
181; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
182; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 361 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
183; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
184; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
185; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
186; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
187; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
188; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
189; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
190; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
191;
192; SIZE-LABEL: 'reduce_fmul_double'
193; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
194; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
195; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
196; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
197; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
198; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
199; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
200; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
201; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
202; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
203; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
204; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
205; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
206;
207  %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
208  %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
209  %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
210  %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
211  %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
212  %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
213  %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
214  %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
215  %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
216  %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
217  %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
218  %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
219  ret void
220}
221
222define void @reduce_ordered_fmul_bfloat() {
223; FP-REDUCE-LABEL: 'reduce_ordered_fmul_bfloat'
224; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
225; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
226; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
227; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
228; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
229; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
230; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
231; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
232; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
233; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
234; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
235; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
236; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
237; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
238; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
239;
240; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
241; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
242; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
243; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
244; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
245; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
246; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
247; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
248; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
249; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
250; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
251; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
252; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
253; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
254; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
255; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
256;
257  %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
258  %V2 = call bfloat @llvm.vector.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef)
259  %V4 = call bfloat @llvm.vector.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef)
260  %V8 = call bfloat @llvm.vector.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef)
261  %V16 = call bfloat @llvm.vector.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef)
262  %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
263  %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
264  %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
265  %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
266  %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
267  %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
268  %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
269  %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
270  %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
271  ret void
272}
273
274define void @reduce_ordered_fmul_half() {
275; FP-REDUCE-LABEL: 'reduce_ordered_fmul_half'
276; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
277; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
278; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
279; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
280; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
281; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
282; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
283; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
284; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
285; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
286; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
287; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
288; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
289; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
290; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
291;
292; SIZE-LABEL: 'reduce_ordered_fmul_half'
293; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
294; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
295; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
296; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
297; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
298; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
299; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
300; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
301; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
302; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
303; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
304; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
305; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
306; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
307; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
308;
309  %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
310  %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
311  %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
312  %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
313  %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
314  %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
315  %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
316  %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
317  %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
318  %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
319  %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
320  %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
321  %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
322  %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
323  ret void
324}
325
326define void @reduce_ordered_fmul_float() {
327; FP-REDUCE-LABEL: 'reduce_ordered_fmul_float'
328; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
329; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
330; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
331; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
332; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
333; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
334; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
335; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
336; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
337; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
338; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
339; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
340; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
341; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
342;
343; SIZE-LABEL: 'reduce_ordered_fmul_float'
344; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
345; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
346; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
347; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
348; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
349; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
350; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
351; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
352; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
353; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
354; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
355; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
356; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
357; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
358;
359  %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
360  %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
361  %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
362  %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
363  %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
364  %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
365  %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
366  %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
367  %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
368  %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
369  %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
370  %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
371  %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
372  ret void
373}
374
375define void @reduce_ordered_fmul_double() {
376; FP-REDUCE-LABEL: 'reduce_ordered_fmul_double'
377; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
378; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
379; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
380; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
381; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
382; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
383; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
384; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
385; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
386; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
387; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
388; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
389; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
390;
391; SIZE-LABEL: 'reduce_ordered_fmul_double'
392; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
393; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
394; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
395; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
396; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
397; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
398; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
399; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
400; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
401; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
402; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
403; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
404; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
405;
406  %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
407  %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
408  %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
409  %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
410  %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
411  %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
412  %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
413  %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
414  %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
415  %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
416  %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
417  %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
418  ret void
419}
420