xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
3; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN
6; Check that the default value enables the web folding and
7; that it is bigger than 3.
8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
9
10define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) {
11; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users:
12; NO_FOLDING:       # %bb.0:
13; NO_FOLDING-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
14; NO_FOLDING-NEXT:    vfwcvt.f.f.v v11, v8
15; NO_FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
16; NO_FOLDING-NEXT:    vfwcvt.f.f.v v9, v10
17; NO_FOLDING-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
18; NO_FOLDING-NEXT:    vfmul.vv v10, v11, v8
19; NO_FOLDING-NEXT:    vfadd.vv v11, v11, v9
20; NO_FOLDING-NEXT:    vfsub.vv v8, v8, v9
21; NO_FOLDING-NEXT:    vse32.v v10, (a0)
22; NO_FOLDING-NEXT:    vse32.v v11, (a1)
23; NO_FOLDING-NEXT:    vse32.v v8, (a2)
24; NO_FOLDING-NEXT:    ret
25;
26; ZVFH-LABEL: vfwmul_v2f116_multiple_users:
27; ZVFH:       # %bb.0:
28; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
29; ZVFH-NEXT:    vfwmul.vv v11, v8, v9
30; ZVFH-NEXT:    vfwadd.vv v12, v8, v10
31; ZVFH-NEXT:    vfwsub.vv v8, v9, v10
32; ZVFH-NEXT:    vse32.v v11, (a0)
33; ZVFH-NEXT:    vse32.v v12, (a1)
34; ZVFH-NEXT:    vse32.v v8, (a2)
35; ZVFH-NEXT:    ret
36;
37; ZVFHMIN-LABEL: vfwmul_v2f116_multiple_users:
38; ZVFHMIN:       # %bb.0:
39; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
40; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
41; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
42; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
43; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
44; ZVFHMIN-NEXT:    vfmul.vv v10, v11, v8
45; ZVFHMIN-NEXT:    vfadd.vv v11, v11, v9
46; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v9
47; ZVFHMIN-NEXT:    vse32.v v10, (a0)
48; ZVFHMIN-NEXT:    vse32.v v11, (a1)
49; ZVFHMIN-NEXT:    vse32.v v8, (a2)
50; ZVFHMIN-NEXT:    ret
51  %c = fpext <2 x half> %a to <2 x float>
52  %d = fpext <2 x half> %b to <2 x float>
53  %d2 = fpext <2 x half> %b2 to <2 x float>
54  %e = fmul <2 x float> %c, %d
55  %f = fadd <2 x float> %c, %d2
56  %g = fsub <2 x float> %d, %d2
57  store <2 x float> %e, ptr %x
58  store <2 x float> %f, ptr %y
59  store <2 x float> %g, ptr %z
60  ret void
61}
62
63define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
64; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users:
65; NO_FOLDING:       # %bb.0:
66; NO_FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
67; NO_FOLDING-NEXT:    vfwcvt.f.f.v v11, v8
68; NO_FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
69; NO_FOLDING-NEXT:    vfwcvt.f.f.v v9, v10
70; NO_FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
71; NO_FOLDING-NEXT:    vfmul.vv v10, v11, v8
72; NO_FOLDING-NEXT:    vfadd.vv v11, v11, v9
73; NO_FOLDING-NEXT:    vfsub.vv v8, v8, v9
74; NO_FOLDING-NEXT:    vse64.v v10, (a0)
75; NO_FOLDING-NEXT:    vse64.v v11, (a1)
76; NO_FOLDING-NEXT:    vse64.v v8, (a2)
77; NO_FOLDING-NEXT:    ret
78;
79; FOLDING-LABEL: vfwmul_v2f32_multiple_users:
80; FOLDING:       # %bb.0:
81; FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
82; FOLDING-NEXT:    vfwmul.vv v11, v8, v9
83; FOLDING-NEXT:    vfwadd.vv v12, v8, v10
84; FOLDING-NEXT:    vfwsub.vv v8, v9, v10
85; FOLDING-NEXT:    vse64.v v11, (a0)
86; FOLDING-NEXT:    vse64.v v12, (a1)
87; FOLDING-NEXT:    vse64.v v8, (a2)
88; FOLDING-NEXT:    ret
89  %c = fpext <2 x float> %a to <2 x double>
90  %d = fpext <2 x float> %b to <2 x double>
91  %d2 = fpext <2 x float> %b2 to <2 x double>
92  %e = fmul <2 x double> %c, %d
93  %f = fadd <2 x double> %c, %d2
94  %g = fsub <2 x double> %d, %d2
95  store <2 x double> %e, ptr %x
96  store <2 x double> %f, ptr %y
97  store <2 x double> %g, ptr %z
98  ret void
99}
100
101define void @vfwmacc_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2, <2 x double> %w) {
102; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users:
103; NO_FOLDING:       # %bb.0:
104; NO_FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
105; NO_FOLDING-NEXT:    vfwcvt.f.f.v v12, v8
106; NO_FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
107; NO_FOLDING-NEXT:    vfwcvt.f.f.v v9, v10
108; NO_FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
109; NO_FOLDING-NEXT:    vfmul.vv v10, v12, v8
110; NO_FOLDING-NEXT:    vfmadd.vv v12, v9, v11
111; NO_FOLDING-NEXT:    vfsub.vv v8, v8, v9
112; NO_FOLDING-NEXT:    vse64.v v10, (a0)
113; NO_FOLDING-NEXT:    vse64.v v12, (a1)
114; NO_FOLDING-NEXT:    vse64.v v8, (a2)
115; NO_FOLDING-NEXT:    ret
116;
117; FOLDING-LABEL: vfwmacc_v2f32_multiple_users:
118; FOLDING:       # %bb.0:
119; FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
120; FOLDING-NEXT:    vfwmul.vv v12, v8, v9
121; FOLDING-NEXT:    vfwsub.vv v13, v9, v10
122; FOLDING-NEXT:    vfwmacc.vv v11, v8, v10
123; FOLDING-NEXT:    vse64.v v12, (a0)
124; FOLDING-NEXT:    vse64.v v11, (a1)
125; FOLDING-NEXT:    vse64.v v13, (a2)
126; FOLDING-NEXT:    ret
127  %c = fpext <2 x float> %a to <2 x double>
128  %d = fpext <2 x float> %b to <2 x double>
129  %d2 = fpext <2 x float> %b2 to <2 x double>
130  %e = fmul <2 x double> %c, %d
131  %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %w)
132  %g = fsub <2 x double> %d, %d2
133  store <2 x double> %e, ptr %x
134  store <2 x double> %f, ptr %y
135  store <2 x double> %g, ptr %z
136  ret void
137}
138
139; Negative test. We can't fold because the FMA addend is a user.
140define void @vfwmacc_v2f32_multiple_users_addend_user(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
141; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user:
142; NO_FOLDING:       # %bb.0:
143; NO_FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
144; NO_FOLDING-NEXT:    vfwcvt.f.f.v v11, v8
145; NO_FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
146; NO_FOLDING-NEXT:    vfwcvt.f.f.v v9, v10
147; NO_FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
148; NO_FOLDING-NEXT:    vfmul.vv v10, v11, v8
149; NO_FOLDING-NEXT:    vfmadd.vv v11, v9, v8
150; NO_FOLDING-NEXT:    vfsub.vv v8, v8, v9
151; NO_FOLDING-NEXT:    vse64.v v10, (a0)
152; NO_FOLDING-NEXT:    vse64.v v11, (a1)
153; NO_FOLDING-NEXT:    vse64.v v8, (a2)
154; NO_FOLDING-NEXT:    ret
155;
156; FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user:
157; FOLDING:       # %bb.0:
158; FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
159; FOLDING-NEXT:    vfwcvt.f.f.v v11, v8
160; FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
161; FOLDING-NEXT:    vfwcvt.f.f.v v9, v10
162; FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
163; FOLDING-NEXT:    vfmul.vv v10, v11, v8
164; FOLDING-NEXT:    vfmadd.vv v11, v9, v8
165; FOLDING-NEXT:    vfsub.vv v8, v8, v9
166; FOLDING-NEXT:    vse64.v v10, (a0)
167; FOLDING-NEXT:    vse64.v v11, (a1)
168; FOLDING-NEXT:    vse64.v v8, (a2)
169; FOLDING-NEXT:    ret
170  %c = fpext <2 x float> %a to <2 x double>
171  %d = fpext <2 x float> %b to <2 x double>
172  %d2 = fpext <2 x float> %b2 to <2 x double>
173  %e = fmul <2 x double> %c, %d
174  %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %d)
175  %g = fsub <2 x double> %d, %d2
176  store <2 x double> %e, ptr %x
177  store <2 x double> %f, ptr %y
178  store <2 x double> %g, ptr %z
179  ret void
180}
181
182; Negative test. We can't fold because the FMA addend is a user.
183define void @vfwmacc_v2f32_addend_user(ptr %x, <2 x float> %a, <2 x float> %b) {
184; NO_FOLDING-LABEL: vfwmacc_v2f32_addend_user:
185; NO_FOLDING:       # %bb.0:
186; NO_FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
187; NO_FOLDING-NEXT:    vfwcvt.f.f.v v10, v8
188; NO_FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
189; NO_FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
190; NO_FOLDING-NEXT:    vfmadd.vv v8, v10, v8
191; NO_FOLDING-NEXT:    vse64.v v8, (a0)
192; NO_FOLDING-NEXT:    ret
193;
194; FOLDING-LABEL: vfwmacc_v2f32_addend_user:
195; FOLDING:       # %bb.0:
196; FOLDING-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
197; FOLDING-NEXT:    vfwcvt.f.f.v v10, v8
198; FOLDING-NEXT:    vfwcvt.f.f.v v8, v9
199; FOLDING-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
200; FOLDING-NEXT:    vfmadd.vv v8, v10, v8
201; FOLDING-NEXT:    vse64.v v8, (a0)
202; FOLDING-NEXT:    ret
203  %c = fpext <2 x float> %a to <2 x double>
204  %d = fpext <2 x float> %b to <2 x double>
205  %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d, <2 x double> %d)
206  store <2 x double> %f, ptr %x
207  ret void
208}
209