xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+f,+d -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s
6
7define <2 x float> @vfwadd_v2f16(ptr %x, ptr %y) {
8; CHECK-LABEL: vfwadd_v2f16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
11; CHECK-NEXT:    vle16.v v9, (a0)
12; CHECK-NEXT:    vle16.v v10, (a1)
13; CHECK-NEXT:    vfwadd.vv v8, v9, v10
14; CHECK-NEXT:    ret
15  %a = load <2 x half>, ptr %x
16  %b = load <2 x half>, ptr %y
17  %c = fpext <2 x half> %a to <2 x float>
18  %d = fpext <2 x half> %b to <2 x float>
19  %e = fadd <2 x float> %c, %d
20  ret <2 x float> %e
21}
22
23define <4 x float> @vfwadd_v4f16(ptr %x, ptr %y) {
24; CHECK-LABEL: vfwadd_v4f16:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
27; CHECK-NEXT:    vle16.v v9, (a0)
28; CHECK-NEXT:    vle16.v v10, (a1)
29; CHECK-NEXT:    vfwadd.vv v8, v9, v10
30; CHECK-NEXT:    ret
31  %a = load <4 x half>, ptr %x
32  %b = load <4 x half>, ptr %y
33  %c = fpext <4 x half> %a to <4 x float>
34  %d = fpext <4 x half> %b to <4 x float>
35  %e = fadd <4 x float> %c, %d
36  ret <4 x float> %e
37}
38
39define <8 x float> @vfwadd_v8f16(ptr %x, ptr %y) {
40; CHECK-LABEL: vfwadd_v8f16:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
43; CHECK-NEXT:    vle16.v v10, (a0)
44; CHECK-NEXT:    vle16.v v11, (a1)
45; CHECK-NEXT:    vfwadd.vv v8, v10, v11
46; CHECK-NEXT:    ret
47  %a = load <8 x half>, ptr %x
48  %b = load <8 x half>, ptr %y
49  %c = fpext <8 x half> %a to <8 x float>
50  %d = fpext <8 x half> %b to <8 x float>
51  %e = fadd <8 x float> %c, %d
52  ret <8 x float> %e
53}
54
55define <16 x float> @vfwadd_v16f16(ptr %x, ptr %y) {
56; CHECK-LABEL: vfwadd_v16f16:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
59; CHECK-NEXT:    vle16.v v12, (a0)
60; CHECK-NEXT:    vle16.v v14, (a1)
61; CHECK-NEXT:    vfwadd.vv v8, v12, v14
62; CHECK-NEXT:    ret
63  %a = load <16 x half>, ptr %x
64  %b = load <16 x half>, ptr %y
65  %c = fpext <16 x half> %a to <16 x float>
66  %d = fpext <16 x half> %b to <16 x float>
67  %e = fadd <16 x float> %c, %d
68  ret <16 x float> %e
69}
70
71define <32 x float> @vfwadd_v32f16(ptr %x, ptr %y) {
72; CHECK-LABEL: vfwadd_v32f16:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    li a2, 32
75; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
76; CHECK-NEXT:    vle16.v v16, (a0)
77; CHECK-NEXT:    vle16.v v20, (a1)
78; CHECK-NEXT:    vfwadd.vv v8, v16, v20
79; CHECK-NEXT:    ret
80  %a = load <32 x half>, ptr %x
81  %b = load <32 x half>, ptr %y
82  %c = fpext <32 x half> %a to <32 x float>
83  %d = fpext <32 x half> %b to <32 x float>
84  %e = fadd <32 x float> %c, %d
85  ret <32 x float> %e
86}
87
88define <64 x float> @vfwadd_v64f16(ptr %x, ptr %y) {
89; CHECK-LABEL: vfwadd_v64f16:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    addi sp, sp, -16
92; CHECK-NEXT:    .cfi_def_cfa_offset 16
93; CHECK-NEXT:    csrr a2, vlenb
94; CHECK-NEXT:    slli a2, a2, 4
95; CHECK-NEXT:    sub sp, sp, a2
96; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
97; CHECK-NEXT:    li a2, 64
98; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
99; CHECK-NEXT:    vle16.v v8, (a0)
100; CHECK-NEXT:    addi a0, sp, 16
101; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
102; CHECK-NEXT:    vle16.v v0, (a1)
103; CHECK-NEXT:    li a0, 32
104; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
105; CHECK-NEXT:    vslidedown.vx v16, v8, a0
106; CHECK-NEXT:    vslidedown.vx v8, v0, a0
107; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
108; CHECK-NEXT:    vfwadd.vv v24, v16, v8
109; CHECK-NEXT:    csrr a0, vlenb
110; CHECK-NEXT:    slli a0, a0, 3
111; CHECK-NEXT:    add a0, sp, a0
112; CHECK-NEXT:    addi a0, a0, 16
113; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
114; CHECK-NEXT:    addi a0, sp, 16
115; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
116; CHECK-NEXT:    vfwadd.vv v8, v16, v0
117; CHECK-NEXT:    csrr a0, vlenb
118; CHECK-NEXT:    slli a0, a0, 3
119; CHECK-NEXT:    add a0, sp, a0
120; CHECK-NEXT:    addi a0, a0, 16
121; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
122; CHECK-NEXT:    csrr a0, vlenb
123; CHECK-NEXT:    slli a0, a0, 4
124; CHECK-NEXT:    add sp, sp, a0
125; CHECK-NEXT:    .cfi_def_cfa sp, 16
126; CHECK-NEXT:    addi sp, sp, 16
127; CHECK-NEXT:    .cfi_def_cfa_offset 0
128; CHECK-NEXT:    ret
129  %a = load <64 x half>, ptr %x
130  %b = load <64 x half>, ptr %y
131  %c = fpext <64 x half> %a to <64 x float>
132  %d = fpext <64 x half> %b to <64 x float>
133  %e = fadd <64 x float> %c, %d
134  ret <64 x float> %e
135}
136
137define <2 x double> @vfwadd_v2f32(ptr %x, ptr %y) {
138; CHECK-LABEL: vfwadd_v2f32:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
141; CHECK-NEXT:    vle32.v v9, (a0)
142; CHECK-NEXT:    vle32.v v10, (a1)
143; CHECK-NEXT:    vfwadd.vv v8, v9, v10
144; CHECK-NEXT:    ret
145  %a = load <2 x float>, ptr %x
146  %b = load <2 x float>, ptr %y
147  %c = fpext <2 x float> %a to <2 x double>
148  %d = fpext <2 x float> %b to <2 x double>
149  %e = fadd <2 x double> %c, %d
150  ret <2 x double> %e
151}
152
153define <4 x double> @vfwadd_v4f32(ptr %x, ptr %y) {
154; CHECK-LABEL: vfwadd_v4f32:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
157; CHECK-NEXT:    vle32.v v10, (a0)
158; CHECK-NEXT:    vle32.v v11, (a1)
159; CHECK-NEXT:    vfwadd.vv v8, v10, v11
160; CHECK-NEXT:    ret
161  %a = load <4 x float>, ptr %x
162  %b = load <4 x float>, ptr %y
163  %c = fpext <4 x float> %a to <4 x double>
164  %d = fpext <4 x float> %b to <4 x double>
165  %e = fadd <4 x double> %c, %d
166  ret <4 x double> %e
167}
168
169define <8 x double> @vfwadd_v8f32(ptr %x, ptr %y) {
170; CHECK-LABEL: vfwadd_v8f32:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
173; CHECK-NEXT:    vle32.v v12, (a0)
174; CHECK-NEXT:    vle32.v v14, (a1)
175; CHECK-NEXT:    vfwadd.vv v8, v12, v14
176; CHECK-NEXT:    ret
177  %a = load <8 x float>, ptr %x
178  %b = load <8 x float>, ptr %y
179  %c = fpext <8 x float> %a to <8 x double>
180  %d = fpext <8 x float> %b to <8 x double>
181  %e = fadd <8 x double> %c, %d
182  ret <8 x double> %e
183}
184
185define <16 x double> @vfwadd_v16f32(ptr %x, ptr %y) {
186; CHECK-LABEL: vfwadd_v16f32:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
189; CHECK-NEXT:    vle32.v v16, (a0)
190; CHECK-NEXT:    vle32.v v20, (a1)
191; CHECK-NEXT:    vfwadd.vv v8, v16, v20
192; CHECK-NEXT:    ret
193  %a = load <16 x float>, ptr %x
194  %b = load <16 x float>, ptr %y
195  %c = fpext <16 x float> %a to <16 x double>
196  %d = fpext <16 x float> %b to <16 x double>
197  %e = fadd <16 x double> %c, %d
198  ret <16 x double> %e
199}
200
201define <32 x double> @vfwadd_v32f32(ptr %x, ptr %y) {
202; CHECK-LABEL: vfwadd_v32f32:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    addi sp, sp, -16
205; CHECK-NEXT:    .cfi_def_cfa_offset 16
206; CHECK-NEXT:    csrr a2, vlenb
207; CHECK-NEXT:    slli a2, a2, 4
208; CHECK-NEXT:    sub sp, sp, a2
209; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
210; CHECK-NEXT:    li a2, 32
211; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
212; CHECK-NEXT:    vle32.v v8, (a0)
213; CHECK-NEXT:    addi a0, sp, 16
214; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
215; CHECK-NEXT:    vle32.v v0, (a1)
216; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
217; CHECK-NEXT:    vslidedown.vi v16, v8, 16
218; CHECK-NEXT:    vslidedown.vi v8, v0, 16
219; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
220; CHECK-NEXT:    vfwadd.vv v24, v16, v8
221; CHECK-NEXT:    csrr a0, vlenb
222; CHECK-NEXT:    slli a0, a0, 3
223; CHECK-NEXT:    add a0, sp, a0
224; CHECK-NEXT:    addi a0, a0, 16
225; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
226; CHECK-NEXT:    addi a0, sp, 16
227; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
228; CHECK-NEXT:    vfwadd.vv v8, v16, v0
229; CHECK-NEXT:    csrr a0, vlenb
230; CHECK-NEXT:    slli a0, a0, 3
231; CHECK-NEXT:    add a0, sp, a0
232; CHECK-NEXT:    addi a0, a0, 16
233; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
234; CHECK-NEXT:    csrr a0, vlenb
235; CHECK-NEXT:    slli a0, a0, 4
236; CHECK-NEXT:    add sp, sp, a0
237; CHECK-NEXT:    .cfi_def_cfa sp, 16
238; CHECK-NEXT:    addi sp, sp, 16
239; CHECK-NEXT:    .cfi_def_cfa_offset 0
240; CHECK-NEXT:    ret
241  %a = load <32 x float>, ptr %x
242  %b = load <32 x float>, ptr %y
243  %c = fpext <32 x float> %a to <32 x double>
244  %d = fpext <32 x float> %b to <32 x double>
245  %e = fadd <32 x double> %c, %d
246  ret <32 x double> %e
247}
248
249define <2 x float> @vfwadd_vf_v2f16(ptr %x, half %y) {
250; CHECK-LABEL: vfwadd_vf_v2f16:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
253; CHECK-NEXT:    vle16.v v9, (a0)
254; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
255; CHECK-NEXT:    ret
256  %a = load <2 x half>, ptr %x
257  %b = insertelement <2 x half> poison, half %y, i32 0
258  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
259  %d = fpext <2 x half> %a to <2 x float>
260  %e = fpext <2 x half> %c to <2 x float>
261  %f = fadd <2 x float> %d, %e
262  ret <2 x float> %f
263}
264
265define <4 x float> @vfwadd_vf_v4f16(ptr %x, half %y) {
266; CHECK-LABEL: vfwadd_vf_v4f16:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
269; CHECK-NEXT:    vle16.v v9, (a0)
270; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
271; CHECK-NEXT:    ret
272  %a = load <4 x half>, ptr %x
273  %b = insertelement <4 x half> poison, half %y, i32 0
274  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
275  %d = fpext <4 x half> %a to <4 x float>
276  %e = fpext <4 x half> %c to <4 x float>
277  %f = fadd <4 x float> %d, %e
278  ret <4 x float> %f
279}
280
281define <8 x float> @vfwadd_vf_v8f16(ptr %x, half %y) {
282; CHECK-LABEL: vfwadd_vf_v8f16:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
285; CHECK-NEXT:    vle16.v v10, (a0)
286; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
287; CHECK-NEXT:    ret
288  %a = load <8 x half>, ptr %x
289  %b = insertelement <8 x half> poison, half %y, i32 0
290  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
291  %d = fpext <8 x half> %a to <8 x float>
292  %e = fpext <8 x half> %c to <8 x float>
293  %f = fadd <8 x float> %d, %e
294  ret <8 x float> %f
295}
296
297define <16 x float> @vfwadd_vf_v16f16(ptr %x, half %y) {
298; CHECK-LABEL: vfwadd_vf_v16f16:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
301; CHECK-NEXT:    vle16.v v12, (a0)
302; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
303; CHECK-NEXT:    ret
304  %a = load <16 x half>, ptr %x
305  %b = insertelement <16 x half> poison, half %y, i32 0
306  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
307  %d = fpext <16 x half> %a to <16 x float>
308  %e = fpext <16 x half> %c to <16 x float>
309  %f = fadd <16 x float> %d, %e
310  ret <16 x float> %f
311}
312
313define <32 x float> @vfwadd_vf_v32f16(ptr %x, half %y) {
314; CHECK-LABEL: vfwadd_vf_v32f16:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    li a1, 32
317; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
318; CHECK-NEXT:    vle16.v v16, (a0)
319; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
320; CHECK-NEXT:    ret
321  %a = load <32 x half>, ptr %x
322  %b = insertelement <32 x half> poison, half %y, i32 0
323  %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer
324  %d = fpext <32 x half> %a to <32 x float>
325  %e = fpext <32 x half> %c to <32 x float>
326  %f = fadd <32 x float> %d, %e
327  ret <32 x float> %f
328}
329
330define <2 x double> @vfwadd_vf_v2f32(ptr %x, float %y) {
331; CHECK-LABEL: vfwadd_vf_v2f32:
332; CHECK:       # %bb.0:
333; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
334; CHECK-NEXT:    vle32.v v9, (a0)
335; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
336; CHECK-NEXT:    ret
337  %a = load <2 x float>, ptr %x
338  %b = insertelement <2 x float> poison, float %y, i32 0
339  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
340  %d = fpext <2 x float> %a to <2 x double>
341  %e = fpext <2 x float> %c to <2 x double>
342  %f = fadd <2 x double> %d, %e
343  ret <2 x double> %f
344}
345
346define <4 x double> @vfwadd_vf_v4f32(ptr %x, float %y) {
347; CHECK-LABEL: vfwadd_vf_v4f32:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
350; CHECK-NEXT:    vle32.v v10, (a0)
351; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
352; CHECK-NEXT:    ret
353  %a = load <4 x float>, ptr %x
354  %b = insertelement <4 x float> poison, float %y, i32 0
355  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
356  %d = fpext <4 x float> %a to <4 x double>
357  %e = fpext <4 x float> %c to <4 x double>
358  %f = fadd <4 x double> %d, %e
359  ret <4 x double> %f
360}
361
362define <8 x double> @vfwadd_vf_v8f32(ptr %x, float %y) {
363; CHECK-LABEL: vfwadd_vf_v8f32:
364; CHECK:       # %bb.0:
365; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
366; CHECK-NEXT:    vle32.v v12, (a0)
367; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
368; CHECK-NEXT:    ret
369  %a = load <8 x float>, ptr %x
370  %b = insertelement <8 x float> poison, float %y, i32 0
371  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
372  %d = fpext <8 x float> %a to <8 x double>
373  %e = fpext <8 x float> %c to <8 x double>
374  %f = fadd <8 x double> %d, %e
375  ret <8 x double> %f
376}
377
378define <16 x double> @vfwadd_vf_v16f32(ptr %x, float %y) {
379; CHECK-LABEL: vfwadd_vf_v16f32:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
382; CHECK-NEXT:    vle32.v v16, (a0)
383; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
384; CHECK-NEXT:    ret
385  %a = load <16 x float>, ptr %x
386  %b = insertelement <16 x float> poison, float %y, i32 0
387  %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
388  %d = fpext <16 x float> %a to <16 x double>
389  %e = fpext <16 x float> %c to <16 x double>
390  %f = fadd <16 x double> %d, %e
391  ret <16 x double> %f
392}
393
394define <32 x double> @vfwadd_vf_v32f32(ptr %x, float %y) {
395; CHECK-LABEL: vfwadd_vf_v32f32:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    li a1, 32
398; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
399; CHECK-NEXT:    vle32.v v24, (a0)
400; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
401; CHECK-NEXT:    vslidedown.vi v8, v24, 16
402; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
403; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
404; CHECK-NEXT:    vfwadd.vf v8, v24, fa0
405; CHECK-NEXT:    ret
406  %a = load <32 x float>, ptr %x
407  %b = insertelement <32 x float> poison, float %y, i32 0
408  %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer
409  %d = fpext <32 x float> %a to <32 x double>
410  %e = fpext <32 x float> %c to <32 x double>
411  %f = fadd <32 x double> %d, %e
412  ret <32 x double> %f
413}
414
415define <2 x float> @vfwadd_wv_v2f16(ptr %x, ptr %y) {
416; CHECK-LABEL: vfwadd_wv_v2f16:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
419; CHECK-NEXT:    vle32.v v8, (a0)
420; CHECK-NEXT:    vle16.v v9, (a1)
421; CHECK-NEXT:    vfwadd.wv v8, v8, v9
422; CHECK-NEXT:    ret
423  %a = load <2 x float>, ptr %x
424  %b = load <2 x half>, ptr %y
425  %c = fpext <2 x half> %b to <2 x float>
426  %d = fadd <2 x float> %c, %a
427  ret <2 x float> %d
428}
429
430define <4 x float> @vfwadd_wv_v4f16(ptr %x, ptr %y) {
431; CHECK-LABEL: vfwadd_wv_v4f16:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
434; CHECK-NEXT:    vle32.v v8, (a0)
435; CHECK-NEXT:    vle16.v v9, (a1)
436; CHECK-NEXT:    vfwadd.wv v8, v8, v9
437; CHECK-NEXT:    ret
438  %a = load <4 x float>, ptr %x
439  %b = load <4 x half>, ptr %y
440  %c = fpext <4 x half> %b to <4 x float>
441  %d = fadd <4 x float> %c, %a
442  ret <4 x float> %d
443}
444
445define <8 x float> @vfwadd_wv_v8f16(ptr %x, ptr %y) {
446; CHECK-LABEL: vfwadd_wv_v8f16:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
449; CHECK-NEXT:    vle32.v v8, (a0)
450; CHECK-NEXT:    vle16.v v10, (a1)
451; CHECK-NEXT:    vfwadd.wv v8, v8, v10
452; CHECK-NEXT:    ret
453  %a = load <8 x float>, ptr %x
454  %b = load <8 x half>, ptr %y
455  %c = fpext <8 x half> %b to <8 x float>
456  %d = fadd <8 x float> %c, %a
457  ret <8 x float> %d
458}
459
460define <16 x float> @vfwadd_wv_v16f16(ptr %x, ptr %y) {
461; CHECK-LABEL: vfwadd_wv_v16f16:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
464; CHECK-NEXT:    vle32.v v8, (a0)
465; CHECK-NEXT:    vle16.v v12, (a1)
466; CHECK-NEXT:    vfwadd.wv v8, v8, v12
467; CHECK-NEXT:    ret
468  %a = load <16 x float>, ptr %x
469  %b = load <16 x half>, ptr %y
470  %c = fpext <16 x half> %b to <16 x float>
471  %d = fadd <16 x float> %c, %a
472  ret <16 x float> %d
473}
474
475define <32 x float> @vfwadd_wv_v32f16(ptr %x, ptr %y) {
476; CHECK-LABEL: vfwadd_wv_v32f16:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    li a2, 32
479; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
480; CHECK-NEXT:    vle32.v v8, (a0)
481; CHECK-NEXT:    vle16.v v16, (a1)
482; CHECK-NEXT:    vfwadd.wv v8, v8, v16
483; CHECK-NEXT:    ret
484  %a = load <32 x float>, ptr %x
485  %b = load <32 x half>, ptr %y
486  %c = fpext <32 x half> %b to <32 x float>
487  %d = fadd <32 x float> %c, %a
488  ret <32 x float> %d
489}
490
491define <2 x double> @vfwadd_wv_v2f32(ptr %x, ptr %y) {
492; CHECK-LABEL: vfwadd_wv_v2f32:
493; CHECK:       # %bb.0:
494; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
495; CHECK-NEXT:    vle64.v v8, (a0)
496; CHECK-NEXT:    vle32.v v9, (a1)
497; CHECK-NEXT:    vfwadd.wv v8, v8, v9
498; CHECK-NEXT:    ret
499  %a = load <2 x double>, ptr %x
500  %b = load <2 x float>, ptr %y
501  %c = fpext <2 x float> %b to <2 x double>
502  %d = fadd <2 x double> %c, %a
503  ret <2 x double> %d
504}
505
506define <4 x double> @vfwadd_wv_v4f32(ptr %x, ptr %y) {
507; CHECK-LABEL: vfwadd_wv_v4f32:
508; CHECK:       # %bb.0:
509; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
510; CHECK-NEXT:    vle64.v v8, (a0)
511; CHECK-NEXT:    vle32.v v10, (a1)
512; CHECK-NEXT:    vfwadd.wv v8, v8, v10
513; CHECK-NEXT:    ret
514  %a = load <4 x double>, ptr %x
515  %b = load <4 x float>, ptr %y
516  %c = fpext <4 x float> %b to <4 x double>
517  %d = fadd <4 x double> %c, %a
518  ret <4 x double> %d
519}
520
521define <8 x double> @vfwadd_wv_v8f32(ptr %x, ptr %y) {
522; CHECK-LABEL: vfwadd_wv_v8f32:
523; CHECK:       # %bb.0:
524; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
525; CHECK-NEXT:    vle64.v v8, (a0)
526; CHECK-NEXT:    vle32.v v12, (a1)
527; CHECK-NEXT:    vfwadd.wv v8, v8, v12
528; CHECK-NEXT:    ret
529  %a = load <8 x double>, ptr %x
530  %b = load <8 x float>, ptr %y
531  %c = fpext <8 x float> %b to <8 x double>
532  %d = fadd <8 x double> %c, %a
533  ret <8 x double> %d
534}
535
536define <16 x double> @vfwadd_wv_v16f32(ptr %x, ptr %y) {
537; CHECK-LABEL: vfwadd_wv_v16f32:
538; CHECK:       # %bb.0:
539; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
540; CHECK-NEXT:    vle64.v v8, (a0)
541; CHECK-NEXT:    vle32.v v16, (a1)
542; CHECK-NEXT:    vfwadd.wv v8, v8, v16
543; CHECK-NEXT:    ret
544  %a = load <16 x double>, ptr %x
545  %b = load <16 x float>, ptr %y
546  %c = fpext <16 x float> %b to <16 x double>
547  %d = fadd <16 x double> %c, %a
548  ret <16 x double> %d
549}
550
551define <2 x float> @vfwadd_wf_v2f16(ptr %x, half %y) {
552; CHECK-LABEL: vfwadd_wf_v2f16:
553; CHECK:       # %bb.0:
554; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
555; CHECK-NEXT:    vle32.v v8, (a0)
556; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
557; CHECK-NEXT:    ret
558  %a = load <2 x float>, ptr %x
559  %b = insertelement <2 x half> poison, half %y, i32 0
560  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
561  %d = fpext <2 x half> %c to <2 x float>
562  %e = fadd <2 x float> %d, %a
563  ret <2 x float> %e
564}
565
566define <4 x float> @vfwadd_wf_v4f16(ptr %x, half %y) {
567; CHECK-LABEL: vfwadd_wf_v4f16:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
570; CHECK-NEXT:    vle32.v v8, (a0)
571; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
572; CHECK-NEXT:    ret
573  %a = load <4 x float>, ptr %x
574  %b = insertelement <4 x half> poison, half %y, i32 0
575  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
576  %d = fpext <4 x half> %c to <4 x float>
577  %e = fadd <4 x float> %d, %a
578  ret <4 x float> %e
579}
580
581define <8 x float> @vfwadd_wf_v8f16(ptr %x, half %y) {
582; CHECK-LABEL: vfwadd_wf_v8f16:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
585; CHECK-NEXT:    vle32.v v8, (a0)
586; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
587; CHECK-NEXT:    ret
588  %a = load <8 x float>, ptr %x
589  %b = insertelement <8 x half> poison, half %y, i32 0
590  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
591  %d = fpext <8 x half> %c to <8 x float>
592  %e = fadd <8 x float> %d, %a
593  ret <8 x float> %e
594}
595
596define <16 x float> @vfwadd_wf_v16f16(ptr %x, half %y) {
597; CHECK-LABEL: vfwadd_wf_v16f16:
598; CHECK:       # %bb.0:
599; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
600; CHECK-NEXT:    vle32.v v8, (a0)
601; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
602; CHECK-NEXT:    ret
603  %a = load <16 x float>, ptr %x
604  %b = insertelement <16 x half> poison, half %y, i32 0
605  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
606  %d = fpext <16 x half> %c to <16 x float>
607  %e = fadd <16 x float> %d, %a
608  ret <16 x float> %e
609}
610
611define <2 x double> @vfwadd_wf_v2f32(ptr %x, float %y) {
612; CHECK-LABEL: vfwadd_wf_v2f32:
613; CHECK:       # %bb.0:
614; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
615; CHECK-NEXT:    vle64.v v8, (a0)
616; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
617; CHECK-NEXT:    ret
618  %a = load <2 x double>, ptr %x
619  %b = insertelement <2 x float> poison, float %y, i32 0
620  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
621  %d = fpext <2 x float> %c to <2 x double>
622  %e = fadd <2 x double> %d, %a
623  ret <2 x double> %e
624}
625
626define <4 x double> @vfwadd_wf_v4f32(ptr %x, float %y) {
627; CHECK-LABEL: vfwadd_wf_v4f32:
628; CHECK:       # %bb.0:
629; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
630; CHECK-NEXT:    vle64.v v8, (a0)
631; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
632; CHECK-NEXT:    ret
633  %a = load <4 x double>, ptr %x
634  %b = insertelement <4 x float> poison, float %y, i32 0
635  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
636  %d = fpext <4 x float> %c to <4 x double>
637  %e = fadd <4 x double> %d, %a
638  ret <4 x double> %e
639}
640
641define <8 x double> @vfwadd_wf_v8f32(ptr %x, float %y) {
642; CHECK-LABEL: vfwadd_wf_v8f32:
643; CHECK:       # %bb.0:
644; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
645; CHECK-NEXT:    vle64.v v8, (a0)
646; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
647; CHECK-NEXT:    ret
648  %a = load <8 x double>, ptr %x
649  %b = insertelement <8 x float> poison, float %y, i32 0
650  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
651  %d = fpext <8 x float> %c to <8 x double>
652  %e = fadd <8 x double> %d, %a
653  ret <8 x double> %e
654}
655
656define <16 x double> @vfwadd_wf_v16f32(ptr %x, float %y) {
657; CHECK-LABEL: vfwadd_wf_v16f32:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
660; CHECK-NEXT:    vle64.v v8, (a0)
661; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
662; CHECK-NEXT:    ret
663  %a = load <16 x double>, ptr %x
664  %b = insertelement <16 x float> poison, float %y, i32 0
665  %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
666  %d = fpext <16 x float> %c to <16 x double>
667  %e = fadd <16 x double> %d, %a
668  ret <16 x double> %e
669}
670
671define <2 x float> @vfwadd_vf2_v2f32(<2 x half> %x, half %y) {
672; CHECK-LABEL: vfwadd_vf2_v2f32:
673; CHECK:       # %bb.0:
674; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
675; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
676; CHECK-NEXT:    vmv1r.v v8, v9
677; CHECK-NEXT:    ret
678  %a = fpext <2 x half> %x to <2 x float>
679  %b = fpext half %y to float
680  %c = insertelement <2 x float> poison, float %b, i32 0
681  %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer
682  %e = fadd <2 x float> %a, %d
683  ret <2 x float> %e
684}
685
686define <2 x float> @vfwadd_wf2_v2f32(<2 x float> %x, half %y) {
687; CHECK-LABEL: vfwadd_wf2_v2f32:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
690; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
691; CHECK-NEXT:    ret
692  %b = fpext half %y to float
693  %c = insertelement <2 x float> poison, float %b, i32 0
694  %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer
695  %e = fadd <2 x float> %x, %d
696  ret <2 x float> %e
697}
698