xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+f,+d -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s
6
7define <2 x float> @vfwmul_v2f16(ptr %x, ptr %y) {
8; CHECK-LABEL: vfwmul_v2f16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
11; CHECK-NEXT:    vle16.v v9, (a0)
12; CHECK-NEXT:    vle16.v v10, (a1)
13; CHECK-NEXT:    vfwmul.vv v8, v9, v10
14; CHECK-NEXT:    ret
15  %a = load <2 x half>, ptr %x
16  %b = load <2 x half>, ptr %y
17  %c = fpext <2 x half> %a to <2 x float>
18  %d = fpext <2 x half> %b to <2 x float>
19  %e = fmul <2 x float> %c, %d
20  ret <2 x float> %e
21}
22
23define <4 x float> @vfwmul_v4f16(ptr %x, ptr %y) {
24; CHECK-LABEL: vfwmul_v4f16:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
27; CHECK-NEXT:    vle16.v v9, (a0)
28; CHECK-NEXT:    vle16.v v10, (a1)
29; CHECK-NEXT:    vfwmul.vv v8, v9, v10
30; CHECK-NEXT:    ret
31  %a = load <4 x half>, ptr %x
32  %b = load <4 x half>, ptr %y
33  %c = fpext <4 x half> %a to <4 x float>
34  %d = fpext <4 x half> %b to <4 x float>
35  %e = fmul <4 x float> %c, %d
36  ret <4 x float> %e
37}
38
39define <8 x float> @vfwmul_v8f16(ptr %x, ptr %y) {
40; CHECK-LABEL: vfwmul_v8f16:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
43; CHECK-NEXT:    vle16.v v10, (a0)
44; CHECK-NEXT:    vle16.v v11, (a1)
45; CHECK-NEXT:    vfwmul.vv v8, v10, v11
46; CHECK-NEXT:    ret
47  %a = load <8 x half>, ptr %x
48  %b = load <8 x half>, ptr %y
49  %c = fpext <8 x half> %a to <8 x float>
50  %d = fpext <8 x half> %b to <8 x float>
51  %e = fmul <8 x float> %c, %d
52  ret <8 x float> %e
53}
54
55define <16 x float> @vfwmul_v16f16(ptr %x, ptr %y) {
56; CHECK-LABEL: vfwmul_v16f16:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
59; CHECK-NEXT:    vle16.v v12, (a0)
60; CHECK-NEXT:    vle16.v v14, (a1)
61; CHECK-NEXT:    vfwmul.vv v8, v12, v14
62; CHECK-NEXT:    ret
63  %a = load <16 x half>, ptr %x
64  %b = load <16 x half>, ptr %y
65  %c = fpext <16 x half> %a to <16 x float>
66  %d = fpext <16 x half> %b to <16 x float>
67  %e = fmul <16 x float> %c, %d
68  ret <16 x float> %e
69}
70
71define <32 x float> @vfwmul_v32f16(ptr %x, ptr %y) {
72; CHECK-LABEL: vfwmul_v32f16:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    li a2, 32
75; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
76; CHECK-NEXT:    vle16.v v16, (a0)
77; CHECK-NEXT:    vle16.v v20, (a1)
78; CHECK-NEXT:    vfwmul.vv v8, v16, v20
79; CHECK-NEXT:    ret
80  %a = load <32 x half>, ptr %x
81  %b = load <32 x half>, ptr %y
82  %c = fpext <32 x half> %a to <32 x float>
83  %d = fpext <32 x half> %b to <32 x float>
84  %e = fmul <32 x float> %c, %d
85  ret <32 x float> %e
86}
87
88define <64 x float> @vfwmul_v64f16(ptr %x, ptr %y) {
89; CHECK-LABEL: vfwmul_v64f16:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    addi sp, sp, -16
92; CHECK-NEXT:    .cfi_def_cfa_offset 16
93; CHECK-NEXT:    csrr a2, vlenb
94; CHECK-NEXT:    slli a2, a2, 4
95; CHECK-NEXT:    sub sp, sp, a2
96; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
97; CHECK-NEXT:    li a2, 64
98; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
99; CHECK-NEXT:    vle16.v v8, (a0)
100; CHECK-NEXT:    addi a0, sp, 16
101; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
102; CHECK-NEXT:    vle16.v v0, (a1)
103; CHECK-NEXT:    li a0, 32
104; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
105; CHECK-NEXT:    vslidedown.vx v16, v8, a0
106; CHECK-NEXT:    vslidedown.vx v8, v0, a0
107; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
108; CHECK-NEXT:    vfwmul.vv v24, v16, v8
109; CHECK-NEXT:    csrr a0, vlenb
110; CHECK-NEXT:    slli a0, a0, 3
111; CHECK-NEXT:    add a0, sp, a0
112; CHECK-NEXT:    addi a0, a0, 16
113; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
114; CHECK-NEXT:    addi a0, sp, 16
115; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
116; CHECK-NEXT:    vfwmul.vv v8, v16, v0
117; CHECK-NEXT:    csrr a0, vlenb
118; CHECK-NEXT:    slli a0, a0, 3
119; CHECK-NEXT:    add a0, sp, a0
120; CHECK-NEXT:    addi a0, a0, 16
121; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
122; CHECK-NEXT:    csrr a0, vlenb
123; CHECK-NEXT:    slli a0, a0, 4
124; CHECK-NEXT:    add sp, sp, a0
125; CHECK-NEXT:    .cfi_def_cfa sp, 16
126; CHECK-NEXT:    addi sp, sp, 16
127; CHECK-NEXT:    .cfi_def_cfa_offset 0
128; CHECK-NEXT:    ret
129  %a = load <64 x half>, ptr %x
130  %b = load <64 x half>, ptr %y
131  %c = fpext <64 x half> %a to <64 x float>
132  %d = fpext <64 x half> %b to <64 x float>
133  %e = fmul <64 x float> %c, %d
134  ret <64 x float> %e
135}
136
137define <2 x double> @vfwmul_v2f32(ptr %x, ptr %y) {
138; CHECK-LABEL: vfwmul_v2f32:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
141; CHECK-NEXT:    vle32.v v9, (a0)
142; CHECK-NEXT:    vle32.v v10, (a1)
143; CHECK-NEXT:    vfwmul.vv v8, v9, v10
144; CHECK-NEXT:    ret
145  %a = load <2 x float>, ptr %x
146  %b = load <2 x float>, ptr %y
147  %c = fpext <2 x float> %a to <2 x double>
148  %d = fpext <2 x float> %b to <2 x double>
149  %e = fmul <2 x double> %c, %d
150  ret <2 x double> %e
151}
152
153define <4 x double> @vfwmul_v4f32(ptr %x, ptr %y) {
154; CHECK-LABEL: vfwmul_v4f32:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
157; CHECK-NEXT:    vle32.v v10, (a0)
158; CHECK-NEXT:    vle32.v v11, (a1)
159; CHECK-NEXT:    vfwmul.vv v8, v10, v11
160; CHECK-NEXT:    ret
161  %a = load <4 x float>, ptr %x
162  %b = load <4 x float>, ptr %y
163  %c = fpext <4 x float> %a to <4 x double>
164  %d = fpext <4 x float> %b to <4 x double>
165  %e = fmul <4 x double> %c, %d
166  ret <4 x double> %e
167}
168
169define <8 x double> @vfwmul_v8f32(ptr %x, ptr %y) {
170; CHECK-LABEL: vfwmul_v8f32:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
173; CHECK-NEXT:    vle32.v v12, (a0)
174; CHECK-NEXT:    vle32.v v14, (a1)
175; CHECK-NEXT:    vfwmul.vv v8, v12, v14
176; CHECK-NEXT:    ret
177  %a = load <8 x float>, ptr %x
178  %b = load <8 x float>, ptr %y
179  %c = fpext <8 x float> %a to <8 x double>
180  %d = fpext <8 x float> %b to <8 x double>
181  %e = fmul <8 x double> %c, %d
182  ret <8 x double> %e
183}
184
185define <16 x double> @vfwmul_v16f32(ptr %x, ptr %y) {
186; CHECK-LABEL: vfwmul_v16f32:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
189; CHECK-NEXT:    vle32.v v16, (a0)
190; CHECK-NEXT:    vle32.v v20, (a1)
191; CHECK-NEXT:    vfwmul.vv v8, v16, v20
192; CHECK-NEXT:    ret
193  %a = load <16 x float>, ptr %x
194  %b = load <16 x float>, ptr %y
195  %c = fpext <16 x float> %a to <16 x double>
196  %d = fpext <16 x float> %b to <16 x double>
197  %e = fmul <16 x double> %c, %d
198  ret <16 x double> %e
199}
200
201define <32 x double> @vfwmul_v32f32(ptr %x, ptr %y) {
202; CHECK-LABEL: vfwmul_v32f32:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    addi sp, sp, -16
205; CHECK-NEXT:    .cfi_def_cfa_offset 16
206; CHECK-NEXT:    csrr a2, vlenb
207; CHECK-NEXT:    slli a2, a2, 4
208; CHECK-NEXT:    sub sp, sp, a2
209; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
210; CHECK-NEXT:    li a2, 32
211; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
212; CHECK-NEXT:    vle32.v v8, (a0)
213; CHECK-NEXT:    addi a0, sp, 16
214; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
215; CHECK-NEXT:    vle32.v v0, (a1)
216; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
217; CHECK-NEXT:    vslidedown.vi v16, v8, 16
218; CHECK-NEXT:    vslidedown.vi v8, v0, 16
219; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
220; CHECK-NEXT:    vfwmul.vv v24, v16, v8
221; CHECK-NEXT:    csrr a0, vlenb
222; CHECK-NEXT:    slli a0, a0, 3
223; CHECK-NEXT:    add a0, sp, a0
224; CHECK-NEXT:    addi a0, a0, 16
225; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
226; CHECK-NEXT:    addi a0, sp, 16
227; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
228; CHECK-NEXT:    vfwmul.vv v8, v16, v0
229; CHECK-NEXT:    csrr a0, vlenb
230; CHECK-NEXT:    slli a0, a0, 3
231; CHECK-NEXT:    add a0, sp, a0
232; CHECK-NEXT:    addi a0, a0, 16
233; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
234; CHECK-NEXT:    csrr a0, vlenb
235; CHECK-NEXT:    slli a0, a0, 4
236; CHECK-NEXT:    add sp, sp, a0
237; CHECK-NEXT:    .cfi_def_cfa sp, 16
238; CHECK-NEXT:    addi sp, sp, 16
239; CHECK-NEXT:    .cfi_def_cfa_offset 0
240; CHECK-NEXT:    ret
241  %a = load <32 x float>, ptr %x
242  %b = load <32 x float>, ptr %y
243  %c = fpext <32 x float> %a to <32 x double>
244  %d = fpext <32 x float> %b to <32 x double>
245  %e = fmul <32 x double> %c, %d
246  ret <32 x double> %e
247}
248
249define <2 x float> @vfwmul_vf_v2f16(ptr %x, half %y) {
250; CHECK-LABEL: vfwmul_vf_v2f16:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
253; CHECK-NEXT:    vle16.v v9, (a0)
254; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
255; CHECK-NEXT:    ret
256  %a = load <2 x half>, ptr %x
257  %b = insertelement <2 x half> poison, half %y, i32 0
258  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
259  %d = fpext <2 x half> %a to <2 x float>
260  %e = fpext <2 x half> %c to <2 x float>
261  %f = fmul <2 x float> %d, %e
262  ret <2 x float> %f
263}
264
265define <4 x float> @vfwmul_vf_v4f16(ptr %x, half %y) {
266; CHECK-LABEL: vfwmul_vf_v4f16:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
269; CHECK-NEXT:    vle16.v v9, (a0)
270; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
271; CHECK-NEXT:    ret
272  %a = load <4 x half>, ptr %x
273  %b = insertelement <4 x half> poison, half %y, i32 0
274  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
275  %d = fpext <4 x half> %a to <4 x float>
276  %e = fpext <4 x half> %c to <4 x float>
277  %f = fmul <4 x float> %d, %e
278  ret <4 x float> %f
279}
280
281define <8 x float> @vfwmul_vf_v8f16(ptr %x, half %y) {
282; CHECK-LABEL: vfwmul_vf_v8f16:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
285; CHECK-NEXT:    vle16.v v10, (a0)
286; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
287; CHECK-NEXT:    ret
288  %a = load <8 x half>, ptr %x
289  %b = insertelement <8 x half> poison, half %y, i32 0
290  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
291  %d = fpext <8 x half> %a to <8 x float>
292  %e = fpext <8 x half> %c to <8 x float>
293  %f = fmul <8 x float> %d, %e
294  ret <8 x float> %f
295}
296
297define <16 x float> @vfwmul_vf_v16f16(ptr %x, half %y) {
298; CHECK-LABEL: vfwmul_vf_v16f16:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
301; CHECK-NEXT:    vle16.v v12, (a0)
302; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
303; CHECK-NEXT:    ret
304  %a = load <16 x half>, ptr %x
305  %b = insertelement <16 x half> poison, half %y, i32 0
306  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
307  %d = fpext <16 x half> %a to <16 x float>
308  %e = fpext <16 x half> %c to <16 x float>
309  %f = fmul <16 x float> %d, %e
310  ret <16 x float> %f
311}
312
313define <32 x float> @vfwmul_vf_v32f16(ptr %x, half %y) {
314; CHECK-LABEL: vfwmul_vf_v32f16:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    li a1, 32
317; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
318; CHECK-NEXT:    vle16.v v16, (a0)
319; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
320; CHECK-NEXT:    ret
321  %a = load <32 x half>, ptr %x
322  %b = insertelement <32 x half> poison, half %y, i32 0
323  %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer
324  %d = fpext <32 x half> %a to <32 x float>
325  %e = fpext <32 x half> %c to <32 x float>
326  %f = fmul <32 x float> %d, %e
327  ret <32 x float> %f
328}
329
330define <2 x double> @vfwmul_vf_v2f32(ptr %x, float %y) {
331; CHECK-LABEL: vfwmul_vf_v2f32:
332; CHECK:       # %bb.0:
333; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
334; CHECK-NEXT:    vle32.v v9, (a0)
335; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
336; CHECK-NEXT:    ret
337  %a = load <2 x float>, ptr %x
338  %b = insertelement <2 x float> poison, float %y, i32 0
339  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
340  %d = fpext <2 x float> %a to <2 x double>
341  %e = fpext <2 x float> %c to <2 x double>
342  %f = fmul <2 x double> %d, %e
343  ret <2 x double> %f
344}
345
346define <4 x double> @vfwmul_vf_v4f32(ptr %x, float %y) {
347; CHECK-LABEL: vfwmul_vf_v4f32:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
350; CHECK-NEXT:    vle32.v v10, (a0)
351; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
352; CHECK-NEXT:    ret
353  %a = load <4 x float>, ptr %x
354  %b = insertelement <4 x float> poison, float %y, i32 0
355  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
356  %d = fpext <4 x float> %a to <4 x double>
357  %e = fpext <4 x float> %c to <4 x double>
358  %f = fmul <4 x double> %d, %e
359  ret <4 x double> %f
360}
361
362define <8 x double> @vfwmul_vf_v8f32(ptr %x, float %y) {
363; CHECK-LABEL: vfwmul_vf_v8f32:
364; CHECK:       # %bb.0:
365; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
366; CHECK-NEXT:    vle32.v v12, (a0)
367; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
368; CHECK-NEXT:    ret
369  %a = load <8 x float>, ptr %x
370  %b = insertelement <8 x float> poison, float %y, i32 0
371  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
372  %d = fpext <8 x float> %a to <8 x double>
373  %e = fpext <8 x float> %c to <8 x double>
374  %f = fmul <8 x double> %d, %e
375  ret <8 x double> %f
376}
377
378define <16 x double> @vfwmul_vf_v16f32(ptr %x, float %y) {
379; CHECK-LABEL: vfwmul_vf_v16f32:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
382; CHECK-NEXT:    vle32.v v16, (a0)
383; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
384; CHECK-NEXT:    ret
385  %a = load <16 x float>, ptr %x
386  %b = insertelement <16 x float> poison, float %y, i32 0
387  %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
388  %d = fpext <16 x float> %a to <16 x double>
389  %e = fpext <16 x float> %c to <16 x double>
390  %f = fmul <16 x double> %d, %e
391  ret <16 x double> %f
392}
393
394define <32 x double> @vfwmul_vf_v32f32(ptr %x, float %y) {
395; CHECK-LABEL: vfwmul_vf_v32f32:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    li a1, 32
398; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
399; CHECK-NEXT:    vle32.v v24, (a0)
400; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
401; CHECK-NEXT:    vslidedown.vi v8, v24, 16
402; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
403; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
404; CHECK-NEXT:    vfwmul.vf v8, v24, fa0
405; CHECK-NEXT:    ret
406  %a = load <32 x float>, ptr %x
407  %b = insertelement <32 x float> poison, float %y, i32 0
408  %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer
409  %d = fpext <32 x float> %a to <32 x double>
410  %e = fpext <32 x float> %c to <32 x double>
411  %f = fmul <32 x double> %d, %e
412  ret <32 x double> %f
413}
414
415define <2 x float> @vfwmul_squared_v2f16_v2f32(ptr %x) {
416; CHECK-LABEL: vfwmul_squared_v2f16_v2f32:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
419; CHECK-NEXT:    vle16.v v9, (a0)
420; CHECK-NEXT:    vfwmul.vv v8, v9, v9
421; CHECK-NEXT:    ret
422  %a = load <2 x half>, ptr %x
423  %b = fpext <2 x half> %a to <2 x float>
424  %c = fmul <2 x float> %b, %b
425  ret <2 x float> %c
426}
427
428define <2 x double> @vfwmul_squared_v2f32_v2f64(ptr %x) {
429; CHECK-LABEL: vfwmul_squared_v2f32_v2f64:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
432; CHECK-NEXT:    vle32.v v9, (a0)
433; CHECK-NEXT:    vfwmul.vv v8, v9, v9
434; CHECK-NEXT:    ret
435  %a = load <2 x float>, ptr %x
436  %b = fpext <2 x float> %a to <2 x double>
437  %c = fmul <2 x double> %b, %b
438  ret <2 x double> %c
439}
440
441define <2 x double> @vfwmul_squared_v2f16_v2f64(ptr %x) {
442; CHECK-LABEL: vfwmul_squared_v2f16_v2f64:
443; CHECK:       # %bb.0:
444; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
445; CHECK-NEXT:    vle16.v v8, (a0)
446; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
447; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
448; CHECK-NEXT:    vfwmul.vv v8, v9, v9
449; CHECK-NEXT:    ret
450  %a = load <2 x half>, ptr %x
451  %b = fpext <2 x half> %a to <2 x double>
452  %c = fmul <2 x double> %b, %b
453  ret <2 x double> %c
454}
455
456define <2 x float> @vfwmul_vf2_v2f32(<2 x half> %x, half %y) {
457; CHECK-LABEL: vfwmul_vf2_v2f32:
458; CHECK:       # %bb.0:
459; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
460; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
461; CHECK-NEXT:    vmv1r.v v8, v9
462; CHECK-NEXT:    ret
463  %a = fpext <2 x half> %x to <2 x float>
464  %b = fpext half %y to float
465  %c = insertelement <2 x float> poison, float %b, i32 0
466  %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer
467  %e = fmul <2 x float> %a, %d
468  ret <2 x float> %e
469}
470