xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (revision cb6f021af2354761357684ffa26ebbe718615244)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
5; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
6
7
8define void @fadd_v8bf16(ptr %x, ptr %y) {
9; CHECK-LABEL: fadd_v8bf16:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
12; CHECK-NEXT:    vle16.v v8, (a1)
13; CHECK-NEXT:    vle16.v v9, (a0)
14; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
15; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
16; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
17; CHECK-NEXT:    vfadd.vv v8, v12, v10
18; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
19; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
20; CHECK-NEXT:    vse16.v v10, (a0)
21; CHECK-NEXT:    ret
22  %a = load <8 x bfloat>, ptr %x
23  %b = load <8 x bfloat>, ptr %y
24  %c = fadd <8 x bfloat> %a, %b
25  store <8 x bfloat> %c, ptr %x
26  ret void
27}
28
29define void @fadd_v6bf16(ptr %x, ptr %y) {
30; CHECK-LABEL: fadd_v6bf16:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
33; CHECK-NEXT:    vle16.v v8, (a1)
34; CHECK-NEXT:    vle16.v v9, (a0)
35; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
36; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
37; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
38; CHECK-NEXT:    vfadd.vv v8, v12, v10
39; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
40; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
41; CHECK-NEXT:    vse16.v v10, (a0)
42; CHECK-NEXT:    ret
43  %a = load <6 x bfloat>, ptr %x
44  %b = load <6 x bfloat>, ptr %y
45  %c = fadd <6 x bfloat> %a, %b
46  store <6 x bfloat> %c, ptr %x
47  ret void
48}
49
50define void @fadd_v8f16(ptr %x, ptr %y) {
51; ZVFH-LABEL: fadd_v8f16:
52; ZVFH:       # %bb.0:
53; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
54; ZVFH-NEXT:    vle16.v v8, (a0)
55; ZVFH-NEXT:    vle16.v v9, (a1)
56; ZVFH-NEXT:    vfadd.vv v8, v8, v9
57; ZVFH-NEXT:    vse16.v v8, (a0)
58; ZVFH-NEXT:    ret
59;
60; ZVFHMIN-LABEL: fadd_v8f16:
61; ZVFHMIN:       # %bb.0:
62; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
63; ZVFHMIN-NEXT:    vle16.v v8, (a1)
64; ZVFHMIN-NEXT:    vle16.v v9, (a0)
65; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
66; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
67; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
68; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
69; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
70; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
71; ZVFHMIN-NEXT:    vse16.v v10, (a0)
72; ZVFHMIN-NEXT:    ret
73  %a = load <8 x half>, ptr %x
74  %b = load <8 x half>, ptr %y
75  %c = fadd <8 x half> %a, %b
76  store <8 x half> %c, ptr %x
77  ret void
78}
79
80define void @fadd_v6f16(ptr %x, ptr %y) {
81; ZVFH-LABEL: fadd_v6f16:
82; ZVFH:       # %bb.0:
83; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
84; ZVFH-NEXT:    vle16.v v8, (a0)
85; ZVFH-NEXT:    vle16.v v9, (a1)
86; ZVFH-NEXT:    vfadd.vv v8, v8, v9
87; ZVFH-NEXT:    vse16.v v8, (a0)
88; ZVFH-NEXT:    ret
89;
90; ZVFHMIN-LABEL: fadd_v6f16:
91; ZVFHMIN:       # %bb.0:
92; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
93; ZVFHMIN-NEXT:    vle16.v v8, (a1)
94; ZVFHMIN-NEXT:    vle16.v v9, (a0)
95; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
96; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
97; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
98; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
99; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
100; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
101; ZVFHMIN-NEXT:    vse16.v v10, (a0)
102; ZVFHMIN-NEXT:    ret
103  %a = load <6 x half>, ptr %x
104  %b = load <6 x half>, ptr %y
105  %c = fadd <6 x half> %a, %b
106  store <6 x half> %c, ptr %x
107  ret void
108}
109
110define void @fadd_v4f32(ptr %x, ptr %y) {
111; CHECK-LABEL: fadd_v4f32:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
114; CHECK-NEXT:    vle32.v v8, (a0)
115; CHECK-NEXT:    vle32.v v9, (a1)
116; CHECK-NEXT:    vfadd.vv v8, v8, v9
117; CHECK-NEXT:    vse32.v v8, (a0)
118; CHECK-NEXT:    ret
119  %a = load <4 x float>, ptr %x
120  %b = load <4 x float>, ptr %y
121  %c = fadd <4 x float> %a, %b
122  store <4 x float> %c, ptr %x
123  ret void
124}
125
126define void @fadd_v2f64(ptr %x, ptr %y) {
127; CHECK-LABEL: fadd_v2f64:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
130; CHECK-NEXT:    vle64.v v8, (a0)
131; CHECK-NEXT:    vle64.v v9, (a1)
132; CHECK-NEXT:    vfadd.vv v8, v8, v9
133; CHECK-NEXT:    vse64.v v8, (a0)
134; CHECK-NEXT:    ret
135  %a = load <2 x double>, ptr %x
136  %b = load <2 x double>, ptr %y
137  %c = fadd <2 x double> %a, %b
138  store <2 x double> %c, ptr %x
139  ret void
140}
141
142define void @fsub_v8bf16(ptr %x, ptr %y) {
143; CHECK-LABEL: fsub_v8bf16:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
146; CHECK-NEXT:    vle16.v v8, (a1)
147; CHECK-NEXT:    vle16.v v9, (a0)
148; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
149; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
150; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
151; CHECK-NEXT:    vfsub.vv v8, v12, v10
152; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
153; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
154; CHECK-NEXT:    vse16.v v10, (a0)
155; CHECK-NEXT:    ret
156  %a = load <8 x bfloat>, ptr %x
157  %b = load <8 x bfloat>, ptr %y
158  %c = fsub <8 x bfloat> %a, %b
159  store <8 x bfloat> %c, ptr %x
160  ret void
161}
162
163define void @fsub_v6bf16(ptr %x, ptr %y) {
164; CHECK-LABEL: fsub_v6bf16:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
167; CHECK-NEXT:    vle16.v v8, (a1)
168; CHECK-NEXT:    vle16.v v9, (a0)
169; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
170; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
171; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
172; CHECK-NEXT:    vfsub.vv v8, v12, v10
173; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
174; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
175; CHECK-NEXT:    vse16.v v10, (a0)
176; CHECK-NEXT:    ret
177  %a = load <6 x bfloat>, ptr %x
178  %b = load <6 x bfloat>, ptr %y
179  %c = fsub <6 x bfloat> %a, %b
180  store <6 x bfloat> %c, ptr %x
181  ret void
182}
183
184define void @fsub_v8f16(ptr %x, ptr %y) {
185; ZVFH-LABEL: fsub_v8f16:
186; ZVFH:       # %bb.0:
187; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
188; ZVFH-NEXT:    vle16.v v8, (a0)
189; ZVFH-NEXT:    vle16.v v9, (a1)
190; ZVFH-NEXT:    vfsub.vv v8, v8, v9
191; ZVFH-NEXT:    vse16.v v8, (a0)
192; ZVFH-NEXT:    ret
193;
194; ZVFHMIN-LABEL: fsub_v8f16:
195; ZVFHMIN:       # %bb.0:
196; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
197; ZVFHMIN-NEXT:    vle16.v v8, (a1)
198; ZVFHMIN-NEXT:    vle16.v v9, (a0)
199; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
200; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
201; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
202; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
203; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
204; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
205; ZVFHMIN-NEXT:    vse16.v v10, (a0)
206; ZVFHMIN-NEXT:    ret
207  %a = load <8 x half>, ptr %x
208  %b = load <8 x half>, ptr %y
209  %c = fsub <8 x half> %a, %b
210  store <8 x half> %c, ptr %x
211  ret void
212}
213
214define void @fsub_v6f16(ptr %x, ptr %y) {
215; ZVFH-LABEL: fsub_v6f16:
216; ZVFH:       # %bb.0:
217; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
218; ZVFH-NEXT:    vle16.v v8, (a0)
219; ZVFH-NEXT:    vle16.v v9, (a1)
220; ZVFH-NEXT:    vfsub.vv v8, v8, v9
221; ZVFH-NEXT:    vse16.v v8, (a0)
222; ZVFH-NEXT:    ret
223;
224; ZVFHMIN-LABEL: fsub_v6f16:
225; ZVFHMIN:       # %bb.0:
226; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
227; ZVFHMIN-NEXT:    vle16.v v8, (a1)
228; ZVFHMIN-NEXT:    vle16.v v9, (a0)
229; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
230; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
231; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
232; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
233; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
234; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
235; ZVFHMIN-NEXT:    vse16.v v10, (a0)
236; ZVFHMIN-NEXT:    ret
237  %a = load <6 x half>, ptr %x
238  %b = load <6 x half>, ptr %y
239  %c = fsub <6 x half> %a, %b
240  store <6 x half> %c, ptr %x
241  ret void
242}
243
244define void @fsub_v4f32(ptr %x, ptr %y) {
245; CHECK-LABEL: fsub_v4f32:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
248; CHECK-NEXT:    vle32.v v8, (a0)
249; CHECK-NEXT:    vle32.v v9, (a1)
250; CHECK-NEXT:    vfsub.vv v8, v8, v9
251; CHECK-NEXT:    vse32.v v8, (a0)
252; CHECK-NEXT:    ret
253  %a = load <4 x float>, ptr %x
254  %b = load <4 x float>, ptr %y
255  %c = fsub <4 x float> %a, %b
256  store <4 x float> %c, ptr %x
257  ret void
258}
259
260define void @fsub_v2f64(ptr %x, ptr %y) {
261; CHECK-LABEL: fsub_v2f64:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
264; CHECK-NEXT:    vle64.v v8, (a0)
265; CHECK-NEXT:    vle64.v v9, (a1)
266; CHECK-NEXT:    vfsub.vv v8, v8, v9
267; CHECK-NEXT:    vse64.v v8, (a0)
268; CHECK-NEXT:    ret
269  %a = load <2 x double>, ptr %x
270  %b = load <2 x double>, ptr %y
271  %c = fsub <2 x double> %a, %b
272  store <2 x double> %c, ptr %x
273  ret void
274}
275
276define void @fmul_v8bf16(ptr %x, ptr %y) {
277; CHECK-LABEL: fmul_v8bf16:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
280; CHECK-NEXT:    vle16.v v8, (a1)
281; CHECK-NEXT:    vle16.v v9, (a0)
282; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
283; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
284; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
285; CHECK-NEXT:    vfmul.vv v8, v12, v10
286; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
287; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
288; CHECK-NEXT:    vse16.v v10, (a0)
289; CHECK-NEXT:    ret
290  %a = load <8 x bfloat>, ptr %x
291  %b = load <8 x bfloat>, ptr %y
292  %c = fmul <8 x bfloat> %a, %b
293  store <8 x bfloat> %c, ptr %x
294  ret void
295}
296
297define void @fmul_v6bf16(ptr %x, ptr %y) {
298; CHECK-LABEL: fmul_v6bf16:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
301; CHECK-NEXT:    vle16.v v8, (a1)
302; CHECK-NEXT:    vle16.v v9, (a0)
303; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
304; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
305; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
306; CHECK-NEXT:    vfmul.vv v8, v12, v10
307; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
308; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
309; CHECK-NEXT:    vse16.v v10, (a0)
310; CHECK-NEXT:    ret
311  %a = load <6 x bfloat>, ptr %x
312  %b = load <6 x bfloat>, ptr %y
313  %c = fmul <6 x bfloat> %a, %b
314  store <6 x bfloat> %c, ptr %x
315  ret void
316}
317
318define void @fmul_v8f16(ptr %x, ptr %y) {
319; ZVFH-LABEL: fmul_v8f16:
320; ZVFH:       # %bb.0:
321; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
322; ZVFH-NEXT:    vle16.v v8, (a0)
323; ZVFH-NEXT:    vle16.v v9, (a1)
324; ZVFH-NEXT:    vfmul.vv v8, v8, v9
325; ZVFH-NEXT:    vse16.v v8, (a0)
326; ZVFH-NEXT:    ret
327;
328; ZVFHMIN-LABEL: fmul_v8f16:
329; ZVFHMIN:       # %bb.0:
330; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
331; ZVFHMIN-NEXT:    vle16.v v8, (a1)
332; ZVFHMIN-NEXT:    vle16.v v9, (a0)
333; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
334; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
335; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
336; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
337; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
338; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
339; ZVFHMIN-NEXT:    vse16.v v10, (a0)
340; ZVFHMIN-NEXT:    ret
341  %a = load <8 x half>, ptr %x
342  %b = load <8 x half>, ptr %y
343  %c = fmul <8 x half> %a, %b
344  store <8 x half> %c, ptr %x
345  ret void
346}
347
348define void @fmul_v6f16(ptr %x, ptr %y) {
349; ZVFH-LABEL: fmul_v6f16:
350; ZVFH:       # %bb.0:
351; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
352; ZVFH-NEXT:    vle16.v v8, (a0)
353; ZVFH-NEXT:    vle16.v v9, (a1)
354; ZVFH-NEXT:    vfmul.vv v8, v8, v9
355; ZVFH-NEXT:    vse16.v v8, (a0)
356; ZVFH-NEXT:    ret
357;
358; ZVFHMIN-LABEL: fmul_v6f16:
359; ZVFHMIN:       # %bb.0:
360; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
361; ZVFHMIN-NEXT:    vle16.v v8, (a1)
362; ZVFHMIN-NEXT:    vle16.v v9, (a0)
363; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
364; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
365; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
366; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
367; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
368; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
369; ZVFHMIN-NEXT:    vse16.v v10, (a0)
370; ZVFHMIN-NEXT:    ret
371  %a = load <6 x half>, ptr %x
372  %b = load <6 x half>, ptr %y
373  %c = fmul <6 x half> %a, %b
374  store <6 x half> %c, ptr %x
375  ret void
376}
377
378define void @fmul_v4f32(ptr %x, ptr %y) {
379; CHECK-LABEL: fmul_v4f32:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
382; CHECK-NEXT:    vle32.v v8, (a0)
383; CHECK-NEXT:    vle32.v v9, (a1)
384; CHECK-NEXT:    vfmul.vv v8, v8, v9
385; CHECK-NEXT:    vse32.v v8, (a0)
386; CHECK-NEXT:    ret
387  %a = load <4 x float>, ptr %x
388  %b = load <4 x float>, ptr %y
389  %c = fmul <4 x float> %a, %b
390  store <4 x float> %c, ptr %x
391  ret void
392}
393
394define void @fmul_v2f64(ptr %x, ptr %y) {
395; CHECK-LABEL: fmul_v2f64:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
398; CHECK-NEXT:    vle64.v v8, (a0)
399; CHECK-NEXT:    vle64.v v9, (a1)
400; CHECK-NEXT:    vfmul.vv v8, v8, v9
401; CHECK-NEXT:    vse64.v v8, (a0)
402; CHECK-NEXT:    ret
403  %a = load <2 x double>, ptr %x
404  %b = load <2 x double>, ptr %y
405  %c = fmul <2 x double> %a, %b
406  store <2 x double> %c, ptr %x
407  ret void
408}
409
410define void @fdiv_v8bf16(ptr %x, ptr %y) {
411; CHECK-LABEL: fdiv_v8bf16:
412; CHECK:       # %bb.0:
413; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
414; CHECK-NEXT:    vle16.v v8, (a1)
415; CHECK-NEXT:    vle16.v v9, (a0)
416; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
417; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
418; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
419; CHECK-NEXT:    vfdiv.vv v8, v12, v10
420; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
421; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
422; CHECK-NEXT:    vse16.v v10, (a0)
423; CHECK-NEXT:    ret
424  %a = load <8 x bfloat>, ptr %x
425  %b = load <8 x bfloat>, ptr %y
426  %c = fdiv <8 x bfloat> %a, %b
427  store <8 x bfloat> %c, ptr %x
428  ret void
429}
430
431define void @fdiv_v6bf16(ptr %x, ptr %y) {
432; CHECK-LABEL: fdiv_v6bf16:
433; CHECK:       # %bb.0:
434; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
435; CHECK-NEXT:    vle16.v v8, (a1)
436; CHECK-NEXT:    vle16.v v9, (a0)
437; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
438; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
439; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
440; CHECK-NEXT:    vfdiv.vv v8, v12, v10
441; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
442; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
443; CHECK-NEXT:    vse16.v v10, (a0)
444; CHECK-NEXT:    ret
445  %a = load <6 x bfloat>, ptr %x
446  %b = load <6 x bfloat>, ptr %y
447  %c = fdiv <6 x bfloat> %a, %b
448  store <6 x bfloat> %c, ptr %x
449  ret void
450}
451
452define void @fdiv_v8f16(ptr %x, ptr %y) {
453; ZVFH-LABEL: fdiv_v8f16:
454; ZVFH:       # %bb.0:
455; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
456; ZVFH-NEXT:    vle16.v v8, (a0)
457; ZVFH-NEXT:    vle16.v v9, (a1)
458; ZVFH-NEXT:    vfdiv.vv v8, v8, v9
459; ZVFH-NEXT:    vse16.v v8, (a0)
460; ZVFH-NEXT:    ret
461;
462; ZVFHMIN-LABEL: fdiv_v8f16:
463; ZVFHMIN:       # %bb.0:
464; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
465; ZVFHMIN-NEXT:    vle16.v v8, (a1)
466; ZVFHMIN-NEXT:    vle16.v v9, (a0)
467; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
468; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
469; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
470; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
471; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
472; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
473; ZVFHMIN-NEXT:    vse16.v v10, (a0)
474; ZVFHMIN-NEXT:    ret
475  %a = load <8 x half>, ptr %x
476  %b = load <8 x half>, ptr %y
477  %c = fdiv <8 x half> %a, %b
478  store <8 x half> %c, ptr %x
479  ret void
480}
481
482define void @fdiv_v6f16(ptr %x, ptr %y) {
483; ZVFH-LABEL: fdiv_v6f16:
484; ZVFH:       # %bb.0:
485; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
486; ZVFH-NEXT:    vle16.v v8, (a0)
487; ZVFH-NEXT:    vle16.v v9, (a1)
488; ZVFH-NEXT:    vfdiv.vv v8, v8, v9
489; ZVFH-NEXT:    vse16.v v8, (a0)
490; ZVFH-NEXT:    ret
491;
492; ZVFHMIN-LABEL: fdiv_v6f16:
493; ZVFHMIN:       # %bb.0:
494; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
495; ZVFHMIN-NEXT:    vle16.v v8, (a1)
496; ZVFHMIN-NEXT:    vle16.v v9, (a0)
497; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
498; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
499; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
500; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
501; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
502; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
503; ZVFHMIN-NEXT:    vse16.v v10, (a0)
504; ZVFHMIN-NEXT:    ret
505  %a = load <6 x half>, ptr %x
506  %b = load <6 x half>, ptr %y
507  %c = fdiv <6 x half> %a, %b
508  store <6 x half> %c, ptr %x
509  ret void
510}
511
512define void @fdiv_v4f32(ptr %x, ptr %y) {
513; CHECK-LABEL: fdiv_v4f32:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
516; CHECK-NEXT:    vle32.v v8, (a0)
517; CHECK-NEXT:    vle32.v v9, (a1)
518; CHECK-NEXT:    vfdiv.vv v8, v8, v9
519; CHECK-NEXT:    vse32.v v8, (a0)
520; CHECK-NEXT:    ret
521  %a = load <4 x float>, ptr %x
522  %b = load <4 x float>, ptr %y
523  %c = fdiv <4 x float> %a, %b
524  store <4 x float> %c, ptr %x
525  ret void
526}
527
528define void @fdiv_v2f64(ptr %x, ptr %y) {
529; CHECK-LABEL: fdiv_v2f64:
530; CHECK:       # %bb.0:
531; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
532; CHECK-NEXT:    vle64.v v8, (a0)
533; CHECK-NEXT:    vle64.v v9, (a1)
534; CHECK-NEXT:    vfdiv.vv v8, v8, v9
535; CHECK-NEXT:    vse64.v v8, (a0)
536; CHECK-NEXT:    ret
537  %a = load <2 x double>, ptr %x
538  %b = load <2 x double>, ptr %y
539  %c = fdiv <2 x double> %a, %b
540  store <2 x double> %c, ptr %x
541  ret void
542}
543
544define void @fneg_v8bf16(ptr %x) {
545; CHECK-LABEL: fneg_v8bf16:
546; CHECK:       # %bb.0:
547; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
548; CHECK-NEXT:    vle16.v v8, (a0)
549; CHECK-NEXT:    lui a1, 8
550; CHECK-NEXT:    vxor.vx v8, v8, a1
551; CHECK-NEXT:    vse16.v v8, (a0)
552; CHECK-NEXT:    ret
553  %a = load <8 x bfloat>, ptr %x
554  %b = fneg <8 x bfloat> %a
555  store <8 x bfloat> %b, ptr %x
556  ret void
557}
558
559define void @fneg_v6bf16(ptr %x) {
560; CHECK-LABEL: fneg_v6bf16:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
563; CHECK-NEXT:    vle16.v v8, (a0)
564; CHECK-NEXT:    lui a1, 8
565; CHECK-NEXT:    vxor.vx v8, v8, a1
566; CHECK-NEXT:    vse16.v v8, (a0)
567; CHECK-NEXT:    ret
568  %a = load <6 x bfloat>, ptr %x
569  %b = fneg <6 x bfloat> %a
570  store <6 x bfloat> %b, ptr %x
571  ret void
572}
573
574define void @fneg_v8f16(ptr %x) {
575; ZVFH-LABEL: fneg_v8f16:
576; ZVFH:       # %bb.0:
577; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
578; ZVFH-NEXT:    vle16.v v8, (a0)
579; ZVFH-NEXT:    vfneg.v v8, v8
580; ZVFH-NEXT:    vse16.v v8, (a0)
581; ZVFH-NEXT:    ret
582;
583; ZVFHMIN-LABEL: fneg_v8f16:
584; ZVFHMIN:       # %bb.0:
585; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
586; ZVFHMIN-NEXT:    vle16.v v8, (a0)
587; ZVFHMIN-NEXT:    lui a1, 8
588; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
589; ZVFHMIN-NEXT:    vse16.v v8, (a0)
590; ZVFHMIN-NEXT:    ret
591  %a = load <8 x half>, ptr %x
592  %b = fneg <8 x half> %a
593  store <8 x half> %b, ptr %x
594  ret void
595}
596
597define void @fneg_v6f16(ptr %x) {
598; ZVFH-LABEL: fneg_v6f16:
599; ZVFH:       # %bb.0:
600; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
601; ZVFH-NEXT:    vle16.v v8, (a0)
602; ZVFH-NEXT:    vfneg.v v8, v8
603; ZVFH-NEXT:    vse16.v v8, (a0)
604; ZVFH-NEXT:    ret
605;
606; ZVFHMIN-LABEL: fneg_v6f16:
607; ZVFHMIN:       # %bb.0:
608; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
609; ZVFHMIN-NEXT:    vle16.v v8, (a0)
610; ZVFHMIN-NEXT:    lui a1, 8
611; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
612; ZVFHMIN-NEXT:    vse16.v v8, (a0)
613; ZVFHMIN-NEXT:    ret
614  %a = load <6 x half>, ptr %x
615  %b = fneg <6 x half> %a
616  store <6 x half> %b, ptr %x
617  ret void
618}
619
620define void @fneg_v4f32(ptr %x) {
621; CHECK-LABEL: fneg_v4f32:
622; CHECK:       # %bb.0:
623; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
624; CHECK-NEXT:    vle32.v v8, (a0)
625; CHECK-NEXT:    vfneg.v v8, v8
626; CHECK-NEXT:    vse32.v v8, (a0)
627; CHECK-NEXT:    ret
628  %a = load <4 x float>, ptr %x
629  %b = fneg <4 x float> %a
630  store <4 x float> %b, ptr %x
631  ret void
632}
633
634define void @fneg_v2f64(ptr %x) {
635; CHECK-LABEL: fneg_v2f64:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
638; CHECK-NEXT:    vle64.v v8, (a0)
639; CHECK-NEXT:    vfneg.v v8, v8
640; CHECK-NEXT:    vse64.v v8, (a0)
641; CHECK-NEXT:    ret
642  %a = load <2 x double>, ptr %x
643  %b = fneg <2 x double> %a
644  store <2 x double> %b, ptr %x
645  ret void
646}
647
648define void @fabs_v8bf16(ptr %x) {
649; CHECK-LABEL: fabs_v8bf16:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
652; CHECK-NEXT:    vle16.v v8, (a0)
653; CHECK-NEXT:    lui a1, 8
654; CHECK-NEXT:    addi a1, a1, -1
655; CHECK-NEXT:    vand.vx v8, v8, a1
656; CHECK-NEXT:    vse16.v v8, (a0)
657; CHECK-NEXT:    ret
658  %a = load <8 x bfloat>, ptr %x
659  %b = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %a)
660  store <8 x bfloat> %b, ptr %x
661  ret void
662}
663
664define void @fabs_v6bf16(ptr %x) {
665; CHECK-LABEL: fabs_v6bf16:
666; CHECK:       # %bb.0:
667; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
668; CHECK-NEXT:    vle16.v v8, (a0)
669; CHECK-NEXT:    lui a1, 8
670; CHECK-NEXT:    addi a1, a1, -1
671; CHECK-NEXT:    vand.vx v8, v8, a1
672; CHECK-NEXT:    vse16.v v8, (a0)
673; CHECK-NEXT:    ret
674  %a = load <6 x bfloat>, ptr %x
675  %b = call <6 x bfloat> @llvm.fabs.v6bf16(<6 x bfloat> %a)
676  store <6 x bfloat> %b, ptr %x
677  ret void
678}
679
680define void @fabs_v8f16(ptr %x) {
681; ZVFH-LABEL: fabs_v8f16:
682; ZVFH:       # %bb.0:
683; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
684; ZVFH-NEXT:    vle16.v v8, (a0)
685; ZVFH-NEXT:    vfabs.v v8, v8
686; ZVFH-NEXT:    vse16.v v8, (a0)
687; ZVFH-NEXT:    ret
688;
689; ZVFHMIN-LABEL: fabs_v8f16:
690; ZVFHMIN:       # %bb.0:
691; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
692; ZVFHMIN-NEXT:    vle16.v v8, (a0)
693; ZVFHMIN-NEXT:    lui a1, 8
694; ZVFHMIN-NEXT:    addi a1, a1, -1
695; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
696; ZVFHMIN-NEXT:    vse16.v v8, (a0)
697; ZVFHMIN-NEXT:    ret
698  %a = load <8 x half>, ptr %x
699  %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
700  store <8 x half> %b, ptr %x
701  ret void
702}
703
704define void @fabs_v6f16(ptr %x) {
705; ZVFH-LABEL: fabs_v6f16:
706; ZVFH:       # %bb.0:
707; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
708; ZVFH-NEXT:    vle16.v v8, (a0)
709; ZVFH-NEXT:    vfabs.v v8, v8
710; ZVFH-NEXT:    vse16.v v8, (a0)
711; ZVFH-NEXT:    ret
712;
713; ZVFHMIN-LABEL: fabs_v6f16:
714; ZVFHMIN:       # %bb.0:
715; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
716; ZVFHMIN-NEXT:    vle16.v v8, (a0)
717; ZVFHMIN-NEXT:    lui a1, 8
718; ZVFHMIN-NEXT:    addi a1, a1, -1
719; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
720; ZVFHMIN-NEXT:    vse16.v v8, (a0)
721; ZVFHMIN-NEXT:    ret
722  %a = load <6 x half>, ptr %x
723  %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
724  store <6 x half> %b, ptr %x
725  ret void
726}
727
728define void @fabs_v4f32(ptr %x) {
729; CHECK-LABEL: fabs_v4f32:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
732; CHECK-NEXT:    vle32.v v8, (a0)
733; CHECK-NEXT:    vfabs.v v8, v8
734; CHECK-NEXT:    vse32.v v8, (a0)
735; CHECK-NEXT:    ret
736  %a = load <4 x float>, ptr %x
737  %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
738  store <4 x float> %b, ptr %x
739  ret void
740}
741
742define void @fabs_v2f64(ptr %x) {
743; CHECK-LABEL: fabs_v2f64:
744; CHECK:       # %bb.0:
745; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
746; CHECK-NEXT:    vle64.v v8, (a0)
747; CHECK-NEXT:    vfabs.v v8, v8
748; CHECK-NEXT:    vse64.v v8, (a0)
749; CHECK-NEXT:    ret
750  %a = load <2 x double>, ptr %x
751  %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
752  store <2 x double> %b, ptr %x
753  ret void
754}
755
756define void @copysign_v8bf16(ptr %x, ptr %y) {
757; CHECK-LABEL: copysign_v8bf16:
758; CHECK:       # %bb.0:
759; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
760; CHECK-NEXT:    vle16.v v8, (a1)
761; CHECK-NEXT:    vle16.v v9, (a0)
762; CHECK-NEXT:    lui a1, 8
763; CHECK-NEXT:    vand.vx v8, v8, a1
764; CHECK-NEXT:    addi a1, a1, -1
765; CHECK-NEXT:    vand.vx v9, v9, a1
766; CHECK-NEXT:    vor.vv v8, v9, v8
767; CHECK-NEXT:    vse16.v v8, (a0)
768; CHECK-NEXT:    ret
769  %a = load <8 x bfloat>, ptr %x
770  %b = load <8 x bfloat>, ptr %y
771  %c = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
772  store <8 x bfloat> %c, ptr %x
773  ret void
774}
775
776define void @copysign_v6bf16(ptr %x, ptr %y) {
777; CHECK-LABEL: copysign_v6bf16:
778; CHECK:       # %bb.0:
779; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
780; CHECK-NEXT:    vle16.v v8, (a1)
781; CHECK-NEXT:    vle16.v v9, (a0)
782; CHECK-NEXT:    lui a1, 8
783; CHECK-NEXT:    vand.vx v8, v8, a1
784; CHECK-NEXT:    addi a1, a1, -1
785; CHECK-NEXT:    vand.vx v9, v9, a1
786; CHECK-NEXT:    vor.vv v8, v9, v8
787; CHECK-NEXT:    vse16.v v8, (a0)
788; CHECK-NEXT:    ret
789  %a = load <6 x bfloat>, ptr %x
790  %b = load <6 x bfloat>, ptr %y
791  %c = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b)
792  store <6 x bfloat> %c, ptr %x
793  ret void
794}
795
796define void @copysign_v8f16(ptr %x, ptr %y) {
797; ZVFH-LABEL: copysign_v8f16:
798; ZVFH:       # %bb.0:
799; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
800; ZVFH-NEXT:    vle16.v v8, (a0)
801; ZVFH-NEXT:    vle16.v v9, (a1)
802; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
803; ZVFH-NEXT:    vse16.v v8, (a0)
804; ZVFH-NEXT:    ret
805;
806; ZVFHMIN-LABEL: copysign_v8f16:
807; ZVFHMIN:       # %bb.0:
808; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
809; ZVFHMIN-NEXT:    vle16.v v8, (a1)
810; ZVFHMIN-NEXT:    vle16.v v9, (a0)
811; ZVFHMIN-NEXT:    lui a1, 8
812; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
813; ZVFHMIN-NEXT:    addi a1, a1, -1
814; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
815; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
816; ZVFHMIN-NEXT:    vse16.v v8, (a0)
817; ZVFHMIN-NEXT:    ret
818  %a = load <8 x half>, ptr %x
819  %b = load <8 x half>, ptr %y
820  %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
821  store <8 x half> %c, ptr %x
822  ret void
823}
824
825define void @copysign_v6f16(ptr %x, ptr %y) {
826; ZVFH-LABEL: copysign_v6f16:
827; ZVFH:       # %bb.0:
828; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
829; ZVFH-NEXT:    vle16.v v8, (a0)
830; ZVFH-NEXT:    vle16.v v9, (a1)
831; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
832; ZVFH-NEXT:    vse16.v v8, (a0)
833; ZVFH-NEXT:    ret
834;
835; ZVFHMIN-LABEL: copysign_v6f16:
836; ZVFHMIN:       # %bb.0:
837; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
838; ZVFHMIN-NEXT:    vle16.v v8, (a1)
839; ZVFHMIN-NEXT:    vle16.v v9, (a0)
840; ZVFHMIN-NEXT:    lui a1, 8
841; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
842; ZVFHMIN-NEXT:    addi a1, a1, -1
843; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
844; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
845; ZVFHMIN-NEXT:    vse16.v v8, (a0)
846; ZVFHMIN-NEXT:    ret
847  %a = load <6 x half>, ptr %x
848  %b = load <6 x half>, ptr %y
849  %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
850  store <6 x half> %c, ptr %x
851  ret void
852}
853
854define void @copysign_v4f32(ptr %x, ptr %y) {
855; CHECK-LABEL: copysign_v4f32:
856; CHECK:       # %bb.0:
857; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
858; CHECK-NEXT:    vle32.v v8, (a0)
859; CHECK-NEXT:    vle32.v v9, (a1)
860; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
861; CHECK-NEXT:    vse32.v v8, (a0)
862; CHECK-NEXT:    ret
863  %a = load <4 x float>, ptr %x
864  %b = load <4 x float>, ptr %y
865  %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
866  store <4 x float> %c, ptr %x
867  ret void
868}
869
870define void @copysign_v2f64(ptr %x, ptr %y) {
871; CHECK-LABEL: copysign_v2f64:
872; CHECK:       # %bb.0:
873; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
874; CHECK-NEXT:    vle64.v v8, (a0)
875; CHECK-NEXT:    vle64.v v9, (a1)
876; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
877; CHECK-NEXT:    vse64.v v8, (a0)
878; CHECK-NEXT:    ret
879  %a = load <2 x double>, ptr %x
880  %b = load <2 x double>, ptr %y
881  %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
882  store <2 x double> %c, ptr %x
883  ret void
884}
885
886define void @copysign_vf_v8bf16(ptr %x, bfloat %y) {
887; CHECK-LABEL: copysign_vf_v8bf16:
888; CHECK:       # %bb.0:
889; CHECK-NEXT:    fmv.x.w a1, fa0
890; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
891; CHECK-NEXT:    vle16.v v8, (a0)
892; CHECK-NEXT:    lui a2, 8
893; CHECK-NEXT:    vmv.v.x v9, a1
894; CHECK-NEXT:    addi a1, a2, -1
895; CHECK-NEXT:    vand.vx v8, v8, a1
896; CHECK-NEXT:    vand.vx v9, v9, a2
897; CHECK-NEXT:    vor.vv v8, v8, v9
898; CHECK-NEXT:    vse16.v v8, (a0)
899; CHECK-NEXT:    ret
900  %a = load <8 x bfloat>, ptr %x
901  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
902  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
903  %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
904  store <8 x bfloat> %d, ptr %x
905  ret void
906}
907
908define void @copysign_vf_v6bf16(ptr %x, bfloat %y) {
909; CHECK-LABEL: copysign_vf_v6bf16:
910; CHECK:       # %bb.0:
911; CHECK-NEXT:    fmv.x.w a1, fa0
912; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
913; CHECK-NEXT:    vle16.v v8, (a0)
914; CHECK-NEXT:    lui a2, 8
915; CHECK-NEXT:    vmv.v.x v9, a1
916; CHECK-NEXT:    addi a1, a2, -1
917; CHECK-NEXT:    vand.vx v8, v8, a1
918; CHECK-NEXT:    vand.vx v9, v9, a2
919; CHECK-NEXT:    vor.vv v8, v8, v9
920; CHECK-NEXT:    vse16.v v8, (a0)
921; CHECK-NEXT:    ret
922  %a = load <6 x bfloat>, ptr %x
923  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
924  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
925  %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
926  store <6 x bfloat> %d, ptr %x
927  ret void
928}
929
930define void @copysign_vf_v8f16(ptr %x, half %y) {
931; ZVFH-LABEL: copysign_vf_v8f16:
932; ZVFH:       # %bb.0:
933; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
934; ZVFH-NEXT:    vle16.v v8, (a0)
935; ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
936; ZVFH-NEXT:    vse16.v v8, (a0)
937; ZVFH-NEXT:    ret
938;
939; ZVFHMIN-LABEL: copysign_vf_v8f16:
940; ZVFHMIN:       # %bb.0:
941; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
942; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
943; ZVFHMIN-NEXT:    vle16.v v8, (a0)
944; ZVFHMIN-NEXT:    lui a2, 8
945; ZVFHMIN-NEXT:    vmv.v.x v9, a1
946; ZVFHMIN-NEXT:    addi a1, a2, -1
947; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
948; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
949; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
950; ZVFHMIN-NEXT:    vse16.v v8, (a0)
951; ZVFHMIN-NEXT:    ret
952  %a = load <8 x half>, ptr %x
953  %b = insertelement <8 x half> poison, half %y, i32 0
954  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
955  %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
956  store <8 x half> %d, ptr %x
957  ret void
958}
959
960define void @copysign_vf_v6f16(ptr %x, half %y) {
961; ZVFH-LABEL: copysign_vf_v6f16:
962; ZVFH:       # %bb.0:
963; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
964; ZVFH-NEXT:    vle16.v v8, (a0)
965; ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
966; ZVFH-NEXT:    vse16.v v8, (a0)
967; ZVFH-NEXT:    ret
968;
969; ZVFHMIN-LABEL: copysign_vf_v6f16:
970; ZVFHMIN:       # %bb.0:
971; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
972; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
973; ZVFHMIN-NEXT:    vle16.v v8, (a0)
974; ZVFHMIN-NEXT:    lui a2, 8
975; ZVFHMIN-NEXT:    vmv.v.x v9, a1
976; ZVFHMIN-NEXT:    addi a1, a2, -1
977; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
978; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
979; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
980; ZVFHMIN-NEXT:    vse16.v v8, (a0)
981; ZVFHMIN-NEXT:    ret
982  %a = load <6 x half>, ptr %x
983  %b = insertelement <6 x half> poison, half %y, i32 0
984  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
985  %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
986  store <6 x half> %d, ptr %x
987  ret void
988}
989
990define void @copysign_vf_v4f32(ptr %x, float %y) {
991; CHECK-LABEL: copysign_vf_v4f32:
992; CHECK:       # %bb.0:
993; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
994; CHECK-NEXT:    vle32.v v8, (a0)
995; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
996; CHECK-NEXT:    vse32.v v8, (a0)
997; CHECK-NEXT:    ret
998  %a = load <4 x float>, ptr %x
999  %b = insertelement <4 x float> poison, float %y, i32 0
1000  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1001  %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1002  store <4 x float> %d, ptr %x
1003  ret void
1004}
1005
1006define void @copysign_vf_v2f64(ptr %x, double %y) {
1007; CHECK-LABEL: copysign_vf_v2f64:
1008; CHECK:       # %bb.0:
1009; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1010; CHECK-NEXT:    vle64.v v8, (a0)
1011; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
1012; CHECK-NEXT:    vse64.v v8, (a0)
1013; CHECK-NEXT:    ret
1014  %a = load <2 x double>, ptr %x
1015  %b = insertelement <2 x double> poison, double %y, i32 0
1016  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1017  %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1018  store <2 x double> %d, ptr %x
1019  ret void
1020}
1021
1022define void @copysign_neg_v8bf16(ptr %x, ptr %y) {
1023; CHECK-LABEL: copysign_neg_v8bf16:
1024; CHECK:       # %bb.0:
1025; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1026; CHECK-NEXT:    vle16.v v8, (a1)
1027; CHECK-NEXT:    vle16.v v9, (a0)
1028; CHECK-NEXT:    lui a1, 8
1029; CHECK-NEXT:    addi a2, a1, -1
1030; CHECK-NEXT:    vxor.vx v8, v8, a1
1031; CHECK-NEXT:    vand.vx v9, v9, a2
1032; CHECK-NEXT:    vand.vx v8, v8, a1
1033; CHECK-NEXT:    vor.vv v8, v9, v8
1034; CHECK-NEXT:    vse16.v v8, (a0)
1035; CHECK-NEXT:    ret
1036  %a = load <8 x bfloat>, ptr %x
1037  %b = load <8 x bfloat>, ptr %y
1038  %c = fneg <8 x bfloat> %b
1039  %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
1040  store <8 x bfloat> %d, ptr %x
1041  ret void
1042}
1043
1044define void @copysign_neg_v6bf16(ptr %x, ptr %y) {
1045; CHECK-LABEL: copysign_neg_v6bf16:
1046; CHECK:       # %bb.0:
1047; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1048; CHECK-NEXT:    vle16.v v8, (a1)
1049; CHECK-NEXT:    vle16.v v9, (a0)
1050; CHECK-NEXT:    lui a1, 8
1051; CHECK-NEXT:    addi a2, a1, -1
1052; CHECK-NEXT:    vxor.vx v8, v8, a1
1053; CHECK-NEXT:    vand.vx v9, v9, a2
1054; CHECK-NEXT:    vand.vx v8, v8, a1
1055; CHECK-NEXT:    vor.vv v8, v9, v8
1056; CHECK-NEXT:    vse16.v v8, (a0)
1057; CHECK-NEXT:    ret
1058  %a = load <6 x bfloat>, ptr %x
1059  %b = load <6 x bfloat>, ptr %y
1060  %c = fneg <6 x bfloat> %b
1061  %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
1062  store <6 x bfloat> %d, ptr %x
1063  ret void
1064}
1065
1066define void @copysign_neg_v8f16(ptr %x, ptr %y) {
1067; ZVFH-LABEL: copysign_neg_v8f16:
1068; ZVFH:       # %bb.0:
1069; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1070; ZVFH-NEXT:    vle16.v v8, (a0)
1071; ZVFH-NEXT:    vle16.v v9, (a1)
1072; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
1073; ZVFH-NEXT:    vse16.v v8, (a0)
1074; ZVFH-NEXT:    ret
1075;
1076; ZVFHMIN-LABEL: copysign_neg_v8f16:
1077; ZVFHMIN:       # %bb.0:
1078; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1079; ZVFHMIN-NEXT:    vle16.v v8, (a1)
1080; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1081; ZVFHMIN-NEXT:    lui a1, 8
1082; ZVFHMIN-NEXT:    addi a2, a1, -1
1083; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
1084; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
1085; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
1086; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
1087; ZVFHMIN-NEXT:    vse16.v v8, (a0)
1088; ZVFHMIN-NEXT:    ret
1089  %a = load <8 x half>, ptr %x
1090  %b = load <8 x half>, ptr %y
1091  %c = fneg <8 x half> %b
1092  %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
1093  store <8 x half> %d, ptr %x
1094  ret void
1095}
1096
1097define void @copysign_neg_v6f16(ptr %x, ptr %y) {
1098; ZVFH-LABEL: copysign_neg_v6f16:
1099; ZVFH:       # %bb.0:
1100; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1101; ZVFH-NEXT:    vle16.v v8, (a0)
1102; ZVFH-NEXT:    vle16.v v9, (a1)
1103; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
1104; ZVFH-NEXT:    vse16.v v8, (a0)
1105; ZVFH-NEXT:    ret
1106;
1107; ZVFHMIN-LABEL: copysign_neg_v6f16:
1108; ZVFHMIN:       # %bb.0:
1109; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1110; ZVFHMIN-NEXT:    vle16.v v8, (a1)
1111; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1112; ZVFHMIN-NEXT:    lui a1, 8
1113; ZVFHMIN-NEXT:    addi a2, a1, -1
1114; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
1115; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
1116; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
1117; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
1118; ZVFHMIN-NEXT:    vse16.v v8, (a0)
1119; ZVFHMIN-NEXT:    ret
1120  %a = load <6 x half>, ptr %x
1121  %b = load <6 x half>, ptr %y
1122  %c = fneg <6 x half> %b
1123  %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
1124  store <6 x half> %d, ptr %x
1125  ret void
1126}
1127
1128define void @copysign_neg_v4f32(ptr %x, ptr %y) {
1129; CHECK-LABEL: copysign_neg_v4f32:
1130; CHECK:       # %bb.0:
1131; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1132; CHECK-NEXT:    vle32.v v8, (a0)
1133; CHECK-NEXT:    vle32.v v9, (a1)
1134; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
1135; CHECK-NEXT:    vse32.v v8, (a0)
1136; CHECK-NEXT:    ret
1137  %a = load <4 x float>, ptr %x
1138  %b = load <4 x float>, ptr %y
1139  %c = fneg <4 x float> %b
1140  %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1141  store <4 x float> %d, ptr %x
1142  ret void
1143}
1144
1145define void @copysign_neg_v2f64(ptr %x, ptr %y) {
1146; CHECK-LABEL: copysign_neg_v2f64:
1147; CHECK:       # %bb.0:
1148; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1149; CHECK-NEXT:    vle64.v v8, (a0)
1150; CHECK-NEXT:    vle64.v v9, (a1)
1151; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
1152; CHECK-NEXT:    vse64.v v8, (a0)
1153; CHECK-NEXT:    ret
1154  %a = load <2 x double>, ptr %x
1155  %b = load <2 x double>, ptr %y
1156  %c = fneg <2 x double> %b
1157  %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1158  store <2 x double> %d, ptr %x
1159  ret void
1160}
1161
1162define void @copysign_neg_trunc_v4bf16_v4f32(ptr %x, ptr %y) {
1163; CHECK-LABEL: copysign_neg_trunc_v4bf16_v4f32:
1164; CHECK:       # %bb.0:
1165; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1166; CHECK-NEXT:    vle16.v v8, (a0)
1167; CHECK-NEXT:    vle32.v v9, (a1)
1168; CHECK-NEXT:    lui a1, 8
1169; CHECK-NEXT:    addi a2, a1, -1
1170; CHECK-NEXT:    vand.vx v8, v8, a2
1171; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v9
1172; CHECK-NEXT:    vxor.vx v9, v10, a1
1173; CHECK-NEXT:    vand.vx v9, v9, a1
1174; CHECK-NEXT:    vor.vv v8, v8, v9
1175; CHECK-NEXT:    vse16.v v8, (a0)
1176; CHECK-NEXT:    ret
1177  %a = load <4 x bfloat>, ptr %x
1178  %b = load <4 x float>, ptr %y
1179  %c = fneg <4 x float> %b
1180  %d = fptrunc <4 x float> %c to <4 x bfloat>
1181  %e = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %d)
1182  store <4 x bfloat> %e, ptr %x
1183  ret void
1184}
1185
1186define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) {
1187; CHECK-LABEL: copysign_neg_trunc_v3bf16_v3f32:
1188; CHECK:       # %bb.0:
1189; CHECK-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
1190; CHECK-NEXT:    vle16.v v8, (a0)
1191; CHECK-NEXT:    vle32.v v9, (a1)
1192; CHECK-NEXT:    lui a1, 8
1193; CHECK-NEXT:    addi a2, a1, -1
1194; CHECK-NEXT:    vand.vx v8, v8, a2
1195; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v9
1196; CHECK-NEXT:    vxor.vx v9, v10, a1
1197; CHECK-NEXT:    vand.vx v9, v9, a1
1198; CHECK-NEXT:    vor.vv v8, v8, v9
1199; CHECK-NEXT:    vse16.v v8, (a0)
1200; CHECK-NEXT:    ret
1201  %a = load <3 x bfloat>, ptr %x
1202  %b = load <3 x float>, ptr %y
1203  %c = fneg <3 x float> %b
1204  %d = fptrunc <3 x float> %c to <3 x bfloat>
1205  %e = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> %a, <3 x bfloat> %d)
1206  store <3 x bfloat> %e, ptr %x
1207  ret void
1208}
1209
1210define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
1211; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
1212; ZVFH:       # %bb.0:
1213; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1214; ZVFH-NEXT:    vle32.v v8, (a1)
1215; ZVFH-NEXT:    vle16.v v9, (a0)
1216; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
1217; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
1218; ZVFH-NEXT:    vse16.v v8, (a0)
1219; ZVFH-NEXT:    ret
1220;
1221; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
1222; ZVFHMIN:       # %bb.0:
1223; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1224; ZVFHMIN-NEXT:    vle16.v v8, (a0)
1225; ZVFHMIN-NEXT:    vle32.v v9, (a1)
1226; ZVFHMIN-NEXT:    lui a1, 8
1227; ZVFHMIN-NEXT:    addi a2, a1, -1
1228; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
1229; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
1230; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
1231; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
1232; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
1233; ZVFHMIN-NEXT:    vse16.v v8, (a0)
1234; ZVFHMIN-NEXT:    ret
1235  %a = load <4 x half>, ptr %x
1236  %b = load <4 x float>, ptr %y
1237  %c = fneg <4 x float> %b
1238  %d = fptrunc <4 x float> %c to <4 x half>
1239  %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
1240  store <4 x half> %e, ptr %x
1241  ret void
1242}
1243
1244define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
1245; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
1246; ZVFH:       # %bb.0:
1247; ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
1248; ZVFH-NEXT:    vle32.v v8, (a1)
1249; ZVFH-NEXT:    vle16.v v9, (a0)
1250; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
1251; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
1252; ZVFH-NEXT:    vse16.v v8, (a0)
1253; ZVFH-NEXT:    ret
1254;
1255; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32:
1256; ZVFHMIN:       # %bb.0:
1257; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
1258; ZVFHMIN-NEXT:    vle16.v v8, (a0)
1259; ZVFHMIN-NEXT:    vle32.v v9, (a1)
1260; ZVFHMIN-NEXT:    lui a1, 8
1261; ZVFHMIN-NEXT:    addi a2, a1, -1
1262; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
1263; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
1264; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
1265; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
1266; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
1267; ZVFHMIN-NEXT:    vse16.v v8, (a0)
1268; ZVFHMIN-NEXT:    ret
1269  %a = load <3 x half>, ptr %x
1270  %b = load <3 x float>, ptr %y
1271  %c = fneg <3 x float> %b
1272  %d = fptrunc <3 x float> %c to <3 x half>
1273  %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
1274  store <3 x half> %e, ptr %x
1275  ret void
1276}
1277
1278define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
1279; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
1280; CHECK:       # %bb.0:
1281; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1282; CHECK-NEXT:    vle32.v v8, (a1)
1283; CHECK-NEXT:    vle64.v v9, (a0)
1284; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
1285; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1286; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10
1287; CHECK-NEXT:    vse64.v v8, (a0)
1288; CHECK-NEXT:    ret
1289  %a = load <2 x double>, ptr %x
1290  %b = load <2 x float>, ptr %y
1291  %c = fneg <2 x float> %b
1292  %d = fpext <2 x float> %c to <2 x double>
1293  %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
1294  store <2 x double> %e, ptr %x
1295  ret void
1296}
1297
1298define void @sqrt_v8bf16(ptr %x) {
1299; CHECK-LABEL: sqrt_v8bf16:
1300; CHECK:       # %bb.0:
1301; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1302; CHECK-NEXT:    vle16.v v8, (a0)
1303; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
1304; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1305; CHECK-NEXT:    vfsqrt.v v8, v10
1306; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1307; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1308; CHECK-NEXT:    vse16.v v10, (a0)
1309; CHECK-NEXT:    ret
1310  %a = load <8 x bfloat>, ptr %x
1311  %b = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a)
1312  store <8 x bfloat> %b, ptr %x
1313  ret void
1314}
1315
1316define void @sqrt_v6bf16(ptr %x) {
1317; CHECK-LABEL: sqrt_v6bf16:
1318; CHECK:       # %bb.0:
1319; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1320; CHECK-NEXT:    vle16.v v8, (a0)
1321; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1322; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
1323; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1324; CHECK-NEXT:    vfsqrt.v v8, v10
1325; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1326; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1327; CHECK-NEXT:    vse16.v v10, (a0)
1328; CHECK-NEXT:    ret
1329  %a = load <6 x bfloat>, ptr %x
1330  %b = call <6 x bfloat> @llvm.sqrt.v6bf16(<6 x bfloat> %a)
1331  store <6 x bfloat> %b, ptr %x
1332  ret void
1333}
1334
1335define void @sqrt_v8f16(ptr %x) {
1336; ZVFH-LABEL: sqrt_v8f16:
1337; ZVFH:       # %bb.0:
1338; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1339; ZVFH-NEXT:    vle16.v v8, (a0)
1340; ZVFH-NEXT:    vfsqrt.v v8, v8
1341; ZVFH-NEXT:    vse16.v v8, (a0)
1342; ZVFH-NEXT:    ret
1343;
1344; ZVFHMIN-LABEL: sqrt_v8f16:
1345; ZVFHMIN:       # %bb.0:
1346; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1347; ZVFHMIN-NEXT:    vle16.v v8, (a0)
1348; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
1349; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1350; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
1351; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1352; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1353; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1354; ZVFHMIN-NEXT:    ret
1355  %a = load <8 x half>, ptr %x
1356  %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
1357  store <8 x half> %b, ptr %x
1358  ret void
1359}
1360
1361define void @sqrt_v6f16(ptr %x) {
1362; ZVFH-LABEL: sqrt_v6f16:
1363; ZVFH:       # %bb.0:
1364; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1365; ZVFH-NEXT:    vle16.v v8, (a0)
1366; ZVFH-NEXT:    vfsqrt.v v8, v8
1367; ZVFH-NEXT:    vse16.v v8, (a0)
1368; ZVFH-NEXT:    ret
1369;
1370; ZVFHMIN-LABEL: sqrt_v6f16:
1371; ZVFHMIN:       # %bb.0:
1372; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1373; ZVFHMIN-NEXT:    vle16.v v8, (a0)
1374; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1375; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
1376; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1377; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
1378; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1379; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1380; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1381; ZVFHMIN-NEXT:    ret
1382  %a = load <6 x half>, ptr %x
1383  %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
1384  store <6 x half> %b, ptr %x
1385  ret void
1386}
1387
1388define void @sqrt_v4f32(ptr %x) {
1389; CHECK-LABEL: sqrt_v4f32:
1390; CHECK:       # %bb.0:
1391; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1392; CHECK-NEXT:    vle32.v v8, (a0)
1393; CHECK-NEXT:    vfsqrt.v v8, v8
1394; CHECK-NEXT:    vse32.v v8, (a0)
1395; CHECK-NEXT:    ret
1396  %a = load <4 x float>, ptr %x
1397  %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
1398  store <4 x float> %b, ptr %x
1399  ret void
1400}
1401
1402define void @sqrt_v2f64(ptr %x) {
1403; CHECK-LABEL: sqrt_v2f64:
1404; CHECK:       # %bb.0:
1405; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1406; CHECK-NEXT:    vle64.v v8, (a0)
1407; CHECK-NEXT:    vfsqrt.v v8, v8
1408; CHECK-NEXT:    vse64.v v8, (a0)
1409; CHECK-NEXT:    ret
1410  %a = load <2 x double>, ptr %x
1411  %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
1412  store <2 x double> %b, ptr %x
1413  ret void
1414}
1415
1416define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) {
1417; CHECK-LABEL: fma_v8bf16:
1418; CHECK:       # %bb.0:
1419; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1420; CHECK-NEXT:    vle16.v v8, (a2)
1421; CHECK-NEXT:    vle16.v v9, (a0)
1422; CHECK-NEXT:    vle16.v v10, (a1)
1423; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1424; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
1425; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
1426; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1427; CHECK-NEXT:    vfmadd.vv v8, v14, v12
1428; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1429; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1430; CHECK-NEXT:    vse16.v v10, (a0)
1431; CHECK-NEXT:    ret
1432  %a = load <8 x bfloat>, ptr %x
1433  %b = load <8 x bfloat>, ptr %y
1434  %c = load <8 x bfloat>, ptr %z
1435  %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
1436  store <8 x bfloat> %d, ptr %x
1437  ret void
1438}
1439
1440define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) {
1441; CHECK-LABEL: fma_v6bf16:
1442; CHECK:       # %bb.0:
1443; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1444; CHECK-NEXT:    vle16.v v8, (a2)
1445; CHECK-NEXT:    vle16.v v9, (a0)
1446; CHECK-NEXT:    vle16.v v10, (a1)
1447; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1448; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1449; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
1450; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
1451; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1452; CHECK-NEXT:    vfmadd.vv v8, v14, v12
1453; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1454; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1455; CHECK-NEXT:    vse16.v v10, (a0)
1456; CHECK-NEXT:    ret
1457  %a = load <6 x bfloat>, ptr %x
1458  %b = load <6 x bfloat>, ptr %y
1459  %c = load <6 x bfloat>, ptr %z
1460  %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c)
1461  store <6 x bfloat> %d, ptr %x
1462  ret void
1463}
1464
1465define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
1466; ZVFH-LABEL: fma_v8f16:
1467; ZVFH:       # %bb.0:
1468; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1469; ZVFH-NEXT:    vle16.v v8, (a0)
1470; ZVFH-NEXT:    vle16.v v9, (a1)
1471; ZVFH-NEXT:    vle16.v v10, (a2)
1472; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
1473; ZVFH-NEXT:    vse16.v v10, (a0)
1474; ZVFH-NEXT:    ret
1475;
1476; ZVFHMIN-LABEL: fma_v8f16:
1477; ZVFHMIN:       # %bb.0:
1478; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1479; ZVFHMIN-NEXT:    vle16.v v8, (a2)
1480; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1481; ZVFHMIN-NEXT:    vle16.v v10, (a1)
1482; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1483; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
1484; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
1485; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1486; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
1487; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1488; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1489; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1490; ZVFHMIN-NEXT:    ret
1491  %a = load <8 x half>, ptr %x
1492  %b = load <8 x half>, ptr %y
1493  %c = load <8 x half>, ptr %z
1494  %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1495  store <8 x half> %d, ptr %x
1496  ret void
1497}
1498
1499define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
1500; ZVFH-LABEL: fma_v6f16:
1501; ZVFH:       # %bb.0:
1502; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1503; ZVFH-NEXT:    vle16.v v8, (a0)
1504; ZVFH-NEXT:    vle16.v v9, (a1)
1505; ZVFH-NEXT:    vle16.v v10, (a2)
1506; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
1507; ZVFH-NEXT:    vse16.v v10, (a0)
1508; ZVFH-NEXT:    ret
1509;
1510; ZVFHMIN-LABEL: fma_v6f16:
1511; ZVFHMIN:       # %bb.0:
1512; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1513; ZVFHMIN-NEXT:    vle16.v v8, (a2)
1514; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1515; ZVFHMIN-NEXT:    vle16.v v10, (a1)
1516; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1517; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1518; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
1519; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
1520; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1521; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
1522; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1523; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1524; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1525; ZVFHMIN-NEXT:    ret
1526  %a = load <6 x half>, ptr %x
1527  %b = load <6 x half>, ptr %y
1528  %c = load <6 x half>, ptr %z
1529  %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
1530  store <6 x half> %d, ptr %x
1531  ret void
1532}
1533
1534define void @fma_v4f32(ptr %x, ptr %y, ptr %z) {
1535; CHECK-LABEL: fma_v4f32:
1536; CHECK:       # %bb.0:
1537; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1538; CHECK-NEXT:    vle32.v v8, (a0)
1539; CHECK-NEXT:    vle32.v v9, (a1)
1540; CHECK-NEXT:    vle32.v v10, (a2)
1541; CHECK-NEXT:    vfmacc.vv v10, v8, v9
1542; CHECK-NEXT:    vse32.v v10, (a0)
1543; CHECK-NEXT:    ret
1544  %a = load <4 x float>, ptr %x
1545  %b = load <4 x float>, ptr %y
1546  %c = load <4 x float>, ptr %z
1547  %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1548  store <4 x float> %d, ptr %x
1549  ret void
1550}
1551
1552define void @fma_v2f64(ptr %x, ptr %y, ptr %z) {
1553; CHECK-LABEL: fma_v2f64:
1554; CHECK:       # %bb.0:
1555; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1556; CHECK-NEXT:    vle64.v v8, (a0)
1557; CHECK-NEXT:    vle64.v v9, (a1)
1558; CHECK-NEXT:    vle64.v v10, (a2)
1559; CHECK-NEXT:    vfmacc.vv v10, v8, v9
1560; CHECK-NEXT:    vse64.v v10, (a0)
1561; CHECK-NEXT:    ret
1562  %a = load <2 x double>, ptr %x
1563  %b = load <2 x double>, ptr %y
1564  %c = load <2 x double>, ptr %z
1565  %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1566  store <2 x double> %d, ptr %x
1567  ret void
1568}
1569
1570define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) {
1571; CHECK-LABEL: fmsub_v8bf16:
1572; CHECK:       # %bb.0:
1573; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1574; CHECK-NEXT:    vle16.v v8, (a2)
1575; CHECK-NEXT:    vle16.v v9, (a0)
1576; CHECK-NEXT:    vle16.v v10, (a1)
1577; CHECK-NEXT:    lui a1, 8
1578; CHECK-NEXT:    vxor.vx v8, v8, a1
1579; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
1580; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
1581; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
1582; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1583; CHECK-NEXT:    vfmadd.vv v8, v12, v14
1584; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1585; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1586; CHECK-NEXT:    vse16.v v10, (a0)
1587; CHECK-NEXT:    ret
1588  %a = load <8 x bfloat>, ptr %x
1589  %b = load <8 x bfloat>, ptr %y
1590  %c = load <8 x bfloat>, ptr %z
1591  %neg = fneg <8 x bfloat> %c
1592  %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg)
1593  store <8 x bfloat> %d, ptr %x
1594  ret void
1595}
1596
1597define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) {
1598; CHECK-LABEL: fmsub_v6bf16:
1599; CHECK:       # %bb.0:
1600; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1601; CHECK-NEXT:    vle16.v v8, (a2)
1602; CHECK-NEXT:    vle16.v v9, (a0)
1603; CHECK-NEXT:    vle16.v v10, (a1)
1604; CHECK-NEXT:    lui a1, 8
1605; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1606; CHECK-NEXT:    vxor.vx v8, v8, a1
1607; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
1608; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
1609; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
1610; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1611; CHECK-NEXT:    vfmadd.vv v8, v12, v14
1612; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1613; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
1614; CHECK-NEXT:    vse16.v v10, (a0)
1615; CHECK-NEXT:    ret
1616  %a = load <6 x bfloat>, ptr %x
1617  %b = load <6 x bfloat>, ptr %y
1618  %c = load <6 x bfloat>, ptr %z
1619  %neg = fneg <6 x bfloat> %c
1620  %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg)
1621  store <6 x bfloat> %d, ptr %x
1622  ret void
1623}
1624
1625define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
1626; ZVFH-LABEL: fmsub_v8f16:
1627; ZVFH:       # %bb.0:
1628; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1629; ZVFH-NEXT:    vle16.v v8, (a0)
1630; ZVFH-NEXT:    vle16.v v9, (a1)
1631; ZVFH-NEXT:    vle16.v v10, (a2)
1632; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
1633; ZVFH-NEXT:    vse16.v v10, (a0)
1634; ZVFH-NEXT:    ret
1635;
1636; ZVFHMIN-LABEL: fmsub_v8f16:
1637; ZVFHMIN:       # %bb.0:
1638; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1639; ZVFHMIN-NEXT:    vle16.v v8, (a2)
1640; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1641; ZVFHMIN-NEXT:    vle16.v v10, (a1)
1642; ZVFHMIN-NEXT:    lui a1, 8
1643; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
1644; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
1645; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
1646; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
1647; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1648; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
1649; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1650; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1651; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1652; ZVFHMIN-NEXT:    ret
1653  %a = load <8 x half>, ptr %x
1654  %b = load <8 x half>, ptr %y
1655  %c = load <8 x half>, ptr %z
1656  %neg = fneg <8 x half> %c
1657  %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
1658  store <8 x half> %d, ptr %x
1659  ret void
1660}
1661
1662define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
1663; ZVFH-LABEL: fmsub_v6f16:
1664; ZVFH:       # %bb.0:
1665; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1666; ZVFH-NEXT:    vle16.v v8, (a0)
1667; ZVFH-NEXT:    vle16.v v9, (a1)
1668; ZVFH-NEXT:    vle16.v v10, (a2)
1669; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
1670; ZVFH-NEXT:    vse16.v v10, (a0)
1671; ZVFH-NEXT:    ret
1672;
1673; ZVFHMIN-LABEL: fmsub_v6f16:
1674; ZVFHMIN:       # %bb.0:
1675; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1676; ZVFHMIN-NEXT:    vle16.v v8, (a2)
1677; ZVFHMIN-NEXT:    vle16.v v9, (a0)
1678; ZVFHMIN-NEXT:    vle16.v v10, (a1)
1679; ZVFHMIN-NEXT:    lui a1, 8
1680; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1681; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
1682; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
1683; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
1684; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
1685; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1686; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
1687; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
1688; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
1689; ZVFHMIN-NEXT:    vse16.v v10, (a0)
1690; ZVFHMIN-NEXT:    ret
1691  %a = load <6 x half>, ptr %x
1692  %b = load <6 x half>, ptr %y
1693  %c = load <6 x half>, ptr %z
1694  %neg = fneg <6 x half> %c
1695  %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
1696  store <6 x half> %d, ptr %x
1697  ret void
1698}
1699
1700define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) {
1701; CHECK-LABEL: fnmsub_v4f32:
1702; CHECK:       # %bb.0:
1703; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1704; CHECK-NEXT:    vle32.v v8, (a0)
1705; CHECK-NEXT:    vle32.v v9, (a1)
1706; CHECK-NEXT:    vle32.v v10, (a2)
1707; CHECK-NEXT:    vfnmsac.vv v10, v8, v9
1708; CHECK-NEXT:    vse32.v v10, (a0)
1709; CHECK-NEXT:    ret
1710  %a = load <4 x float>, ptr %x
1711  %b = load <4 x float>, ptr %y
1712  %c = load <4 x float>, ptr %z
1713  %neg = fneg <4 x float> %a
1714  %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
1715  store <4 x float> %d, ptr %x
1716  ret void
1717}
1718
1719define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
1720; CHECK-LABEL: fnmadd_v2f64:
1721; CHECK:       # %bb.0:
1722; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1723; CHECK-NEXT:    vle64.v v8, (a0)
1724; CHECK-NEXT:    vle64.v v9, (a1)
1725; CHECK-NEXT:    vle64.v v10, (a2)
1726; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
1727; CHECK-NEXT:    vse64.v v10, (a0)
1728; CHECK-NEXT:    ret
1729  %a = load <2 x double>, ptr %x
1730  %b = load <2 x double>, ptr %y
1731  %c = load <2 x double>, ptr %z
1732  %neg = fneg <2 x double> %b
1733  %neg2 = fneg <2 x double> %c
1734  %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
1735  store <2 x double> %d, ptr %x
1736  ret void
1737}
1738
1739define void @fadd_v16bf16(ptr %x, ptr %y) {
1740; CHECK-LABEL: fadd_v16bf16:
1741; CHECK:       # %bb.0:
1742; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1743; CHECK-NEXT:    vle16.v v8, (a1)
1744; CHECK-NEXT:    vle16.v v10, (a0)
1745; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1746; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
1747; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1748; CHECK-NEXT:    vfadd.vv v8, v16, v12
1749; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1750; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
1751; CHECK-NEXT:    vse16.v v12, (a0)
1752; CHECK-NEXT:    ret
1753  %a = load <16 x bfloat>, ptr %x
1754  %b = load <16 x bfloat>, ptr %y
1755  %c = fadd <16 x bfloat> %a, %b
1756  store <16 x bfloat> %c, ptr %x
1757  ret void
1758}
1759
1760define void @fadd_v16f16(ptr %x, ptr %y) {
1761; ZVFH-LABEL: fadd_v16f16:
1762; ZVFH:       # %bb.0:
1763; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1764; ZVFH-NEXT:    vle16.v v8, (a0)
1765; ZVFH-NEXT:    vle16.v v10, (a1)
1766; ZVFH-NEXT:    vfadd.vv v8, v8, v10
1767; ZVFH-NEXT:    vse16.v v8, (a0)
1768; ZVFH-NEXT:    ret
1769;
1770; ZVFHMIN-LABEL: fadd_v16f16:
1771; ZVFHMIN:       # %bb.0:
1772; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1773; ZVFHMIN-NEXT:    vle16.v v8, (a1)
1774; ZVFHMIN-NEXT:    vle16.v v10, (a0)
1775; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1776; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
1777; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1778; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
1779; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1780; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
1781; ZVFHMIN-NEXT:    vse16.v v12, (a0)
1782; ZVFHMIN-NEXT:    ret
1783  %a = load <16 x half>, ptr %x
1784  %b = load <16 x half>, ptr %y
1785  %c = fadd <16 x half> %a, %b
1786  store <16 x half> %c, ptr %x
1787  ret void
1788}
1789
1790define void @fadd_v8f32(ptr %x, ptr %y) {
1791; CHECK-LABEL: fadd_v8f32:
1792; CHECK:       # %bb.0:
1793; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1794; CHECK-NEXT:    vle32.v v8, (a0)
1795; CHECK-NEXT:    vle32.v v10, (a1)
1796; CHECK-NEXT:    vfadd.vv v8, v8, v10
1797; CHECK-NEXT:    vse32.v v8, (a0)
1798; CHECK-NEXT:    ret
1799  %a = load <8 x float>, ptr %x
1800  %b = load <8 x float>, ptr %y
1801  %c = fadd <8 x float> %a, %b
1802  store <8 x float> %c, ptr %x
1803  ret void
1804}
1805
1806define void @fadd_v4f64(ptr %x, ptr %y) {
1807; CHECK-LABEL: fadd_v4f64:
1808; CHECK:       # %bb.0:
1809; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1810; CHECK-NEXT:    vle64.v v8, (a0)
1811; CHECK-NEXT:    vle64.v v10, (a1)
1812; CHECK-NEXT:    vfadd.vv v8, v8, v10
1813; CHECK-NEXT:    vse64.v v8, (a0)
1814; CHECK-NEXT:    ret
1815  %a = load <4 x double>, ptr %x
1816  %b = load <4 x double>, ptr %y
1817  %c = fadd <4 x double> %a, %b
1818  store <4 x double> %c, ptr %x
1819  ret void
1820}
1821
1822define void @fsub_v16bf16(ptr %x, ptr %y) {
1823; CHECK-LABEL: fsub_v16bf16:
1824; CHECK:       # %bb.0:
1825; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1826; CHECK-NEXT:    vle16.v v8, (a1)
1827; CHECK-NEXT:    vle16.v v10, (a0)
1828; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1829; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
1830; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1831; CHECK-NEXT:    vfsub.vv v8, v16, v12
1832; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1833; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
1834; CHECK-NEXT:    vse16.v v12, (a0)
1835; CHECK-NEXT:    ret
1836  %a = load <16 x bfloat>, ptr %x
1837  %b = load <16 x bfloat>, ptr %y
1838  %c = fsub <16 x bfloat> %a, %b
1839  store <16 x bfloat> %c, ptr %x
1840  ret void
1841}
1842
1843define void @fsub_v16f16(ptr %x, ptr %y) {
1844; ZVFH-LABEL: fsub_v16f16:
1845; ZVFH:       # %bb.0:
1846; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1847; ZVFH-NEXT:    vle16.v v8, (a0)
1848; ZVFH-NEXT:    vle16.v v10, (a1)
1849; ZVFH-NEXT:    vfsub.vv v8, v8, v10
1850; ZVFH-NEXT:    vse16.v v8, (a0)
1851; ZVFH-NEXT:    ret
1852;
1853; ZVFHMIN-LABEL: fsub_v16f16:
1854; ZVFHMIN:       # %bb.0:
1855; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1856; ZVFHMIN-NEXT:    vle16.v v8, (a1)
1857; ZVFHMIN-NEXT:    vle16.v v10, (a0)
1858; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1859; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
1860; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1861; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
1862; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1863; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
1864; ZVFHMIN-NEXT:    vse16.v v12, (a0)
1865; ZVFHMIN-NEXT:    ret
1866  %a = load <16 x half>, ptr %x
1867  %b = load <16 x half>, ptr %y
1868  %c = fsub <16 x half> %a, %b
1869  store <16 x half> %c, ptr %x
1870  ret void
1871}
1872
1873define void @fsub_v8f32(ptr %x, ptr %y) {
1874; CHECK-LABEL: fsub_v8f32:
1875; CHECK:       # %bb.0:
1876; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1877; CHECK-NEXT:    vle32.v v8, (a0)
1878; CHECK-NEXT:    vle32.v v10, (a1)
1879; CHECK-NEXT:    vfsub.vv v8, v8, v10
1880; CHECK-NEXT:    vse32.v v8, (a0)
1881; CHECK-NEXT:    ret
1882  %a = load <8 x float>, ptr %x
1883  %b = load <8 x float>, ptr %y
1884  %c = fsub <8 x float> %a, %b
1885  store <8 x float> %c, ptr %x
1886  ret void
1887}
1888
1889define void @fsub_v4f64(ptr %x, ptr %y) {
1890; CHECK-LABEL: fsub_v4f64:
1891; CHECK:       # %bb.0:
1892; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1893; CHECK-NEXT:    vle64.v v8, (a0)
1894; CHECK-NEXT:    vle64.v v10, (a1)
1895; CHECK-NEXT:    vfsub.vv v8, v8, v10
1896; CHECK-NEXT:    vse64.v v8, (a0)
1897; CHECK-NEXT:    ret
1898  %a = load <4 x double>, ptr %x
1899  %b = load <4 x double>, ptr %y
1900  %c = fsub <4 x double> %a, %b
1901  store <4 x double> %c, ptr %x
1902  ret void
1903}
1904
1905define void @fmul_v16bf16(ptr %x, ptr %y) {
1906; CHECK-LABEL: fmul_v16bf16:
1907; CHECK:       # %bb.0:
1908; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1909; CHECK-NEXT:    vle16.v v8, (a1)
1910; CHECK-NEXT:    vle16.v v10, (a0)
1911; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1912; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
1913; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1914; CHECK-NEXT:    vfmul.vv v8, v16, v12
1915; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1916; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
1917; CHECK-NEXT:    vse16.v v12, (a0)
1918; CHECK-NEXT:    ret
1919  %a = load <16 x bfloat>, ptr %x
1920  %b = load <16 x bfloat>, ptr %y
1921  %c = fmul <16 x bfloat> %a, %b
1922  store <16 x bfloat> %c, ptr %x
1923  ret void
1924}
1925
1926define void @fmul_v16f16(ptr %x, ptr %y) {
1927; ZVFH-LABEL: fmul_v16f16:
1928; ZVFH:       # %bb.0:
1929; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1930; ZVFH-NEXT:    vle16.v v8, (a0)
1931; ZVFH-NEXT:    vle16.v v10, (a1)
1932; ZVFH-NEXT:    vfmul.vv v8, v8, v10
1933; ZVFH-NEXT:    vse16.v v8, (a0)
1934; ZVFH-NEXT:    ret
1935;
1936; ZVFHMIN-LABEL: fmul_v16f16:
1937; ZVFHMIN:       # %bb.0:
1938; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1939; ZVFHMIN-NEXT:    vle16.v v8, (a1)
1940; ZVFHMIN-NEXT:    vle16.v v10, (a0)
1941; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1942; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
1943; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1944; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
1945; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1946; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
1947; ZVFHMIN-NEXT:    vse16.v v12, (a0)
1948; ZVFHMIN-NEXT:    ret
1949  %a = load <16 x half>, ptr %x
1950  %b = load <16 x half>, ptr %y
1951  %c = fmul <16 x half> %a, %b
1952  store <16 x half> %c, ptr %x
1953  ret void
1954}
1955
1956define void @fmul_v8f32(ptr %x, ptr %y) {
1957; CHECK-LABEL: fmul_v8f32:
1958; CHECK:       # %bb.0:
1959; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1960; CHECK-NEXT:    vle32.v v8, (a0)
1961; CHECK-NEXT:    vle32.v v10, (a1)
1962; CHECK-NEXT:    vfmul.vv v8, v8, v10
1963; CHECK-NEXT:    vse32.v v8, (a0)
1964; CHECK-NEXT:    ret
1965  %a = load <8 x float>, ptr %x
1966  %b = load <8 x float>, ptr %y
1967  %c = fmul <8 x float> %a, %b
1968  store <8 x float> %c, ptr %x
1969  ret void
1970}
1971
1972define void @fmul_v4f64(ptr %x, ptr %y) {
1973; CHECK-LABEL: fmul_v4f64:
1974; CHECK:       # %bb.0:
1975; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1976; CHECK-NEXT:    vle64.v v8, (a0)
1977; CHECK-NEXT:    vle64.v v10, (a1)
1978; CHECK-NEXT:    vfmul.vv v8, v8, v10
1979; CHECK-NEXT:    vse64.v v8, (a0)
1980; CHECK-NEXT:    ret
1981  %a = load <4 x double>, ptr %x
1982  %b = load <4 x double>, ptr %y
1983  %c = fmul <4 x double> %a, %b
1984  store <4 x double> %c, ptr %x
1985  ret void
1986}
1987
1988define void @fdiv_v16bf16(ptr %x, ptr %y) {
1989; CHECK-LABEL: fdiv_v16bf16:
1990; CHECK:       # %bb.0:
1991; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1992; CHECK-NEXT:    vle16.v v8, (a1)
1993; CHECK-NEXT:    vle16.v v10, (a0)
1994; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
1995; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
1996; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1997; CHECK-NEXT:    vfdiv.vv v8, v16, v12
1998; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1999; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
2000; CHECK-NEXT:    vse16.v v12, (a0)
2001; CHECK-NEXT:    ret
2002  %a = load <16 x bfloat>, ptr %x
2003  %b = load <16 x bfloat>, ptr %y
2004  %c = fdiv <16 x bfloat> %a, %b
2005  store <16 x bfloat> %c, ptr %x
2006  ret void
2007}
2008
2009define void @fdiv_v16f16(ptr %x, ptr %y) {
2010; ZVFH-LABEL: fdiv_v16f16:
2011; ZVFH:       # %bb.0:
2012; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2013; ZVFH-NEXT:    vle16.v v8, (a0)
2014; ZVFH-NEXT:    vle16.v v10, (a1)
2015; ZVFH-NEXT:    vfdiv.vv v8, v8, v10
2016; ZVFH-NEXT:    vse16.v v8, (a0)
2017; ZVFH-NEXT:    ret
2018;
2019; ZVFHMIN-LABEL: fdiv_v16f16:
2020; ZVFHMIN:       # %bb.0:
2021; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2022; ZVFHMIN-NEXT:    vle16.v v8, (a1)
2023; ZVFHMIN-NEXT:    vle16.v v10, (a0)
2024; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
2025; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
2026; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2027; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
2028; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2029; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
2030; ZVFHMIN-NEXT:    vse16.v v12, (a0)
2031; ZVFHMIN-NEXT:    ret
2032  %a = load <16 x half>, ptr %x
2033  %b = load <16 x half>, ptr %y
2034  %c = fdiv <16 x half> %a, %b
2035  store <16 x half> %c, ptr %x
2036  ret void
2037}
2038
2039define void @fdiv_v8f32(ptr %x, ptr %y) {
2040; CHECK-LABEL: fdiv_v8f32:
2041; CHECK:       # %bb.0:
2042; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2043; CHECK-NEXT:    vle32.v v8, (a0)
2044; CHECK-NEXT:    vle32.v v10, (a1)
2045; CHECK-NEXT:    vfdiv.vv v8, v8, v10
2046; CHECK-NEXT:    vse32.v v8, (a0)
2047; CHECK-NEXT:    ret
2048  %a = load <8 x float>, ptr %x
2049  %b = load <8 x float>, ptr %y
2050  %c = fdiv <8 x float> %a, %b
2051  store <8 x float> %c, ptr %x
2052  ret void
2053}
2054
2055define void @fdiv_v4f64(ptr %x, ptr %y) {
2056; CHECK-LABEL: fdiv_v4f64:
2057; CHECK:       # %bb.0:
2058; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2059; CHECK-NEXT:    vle64.v v8, (a0)
2060; CHECK-NEXT:    vle64.v v10, (a1)
2061; CHECK-NEXT:    vfdiv.vv v8, v8, v10
2062; CHECK-NEXT:    vse64.v v8, (a0)
2063; CHECK-NEXT:    ret
2064  %a = load <4 x double>, ptr %x
2065  %b = load <4 x double>, ptr %y
2066  %c = fdiv <4 x double> %a, %b
2067  store <4 x double> %c, ptr %x
2068  ret void
2069}
2070
2071define void @fneg_v16bf16(ptr %x) {
2072; CHECK-LABEL: fneg_v16bf16:
2073; CHECK:       # %bb.0:
2074; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2075; CHECK-NEXT:    vle16.v v8, (a0)
2076; CHECK-NEXT:    lui a1, 8
2077; CHECK-NEXT:    vxor.vx v8, v8, a1
2078; CHECK-NEXT:    vse16.v v8, (a0)
2079; CHECK-NEXT:    ret
2080  %a = load <16 x bfloat>, ptr %x
2081  %b = fneg <16 x bfloat> %a
2082  store <16 x bfloat> %b, ptr %x
2083  ret void
2084}
2085
2086define void @fneg_v16f16(ptr %x) {
2087; ZVFH-LABEL: fneg_v16f16:
2088; ZVFH:       # %bb.0:
2089; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2090; ZVFH-NEXT:    vle16.v v8, (a0)
2091; ZVFH-NEXT:    vfneg.v v8, v8
2092; ZVFH-NEXT:    vse16.v v8, (a0)
2093; ZVFH-NEXT:    ret
2094;
2095; ZVFHMIN-LABEL: fneg_v16f16:
2096; ZVFHMIN:       # %bb.0:
2097; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2098; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2099; ZVFHMIN-NEXT:    lui a1, 8
2100; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
2101; ZVFHMIN-NEXT:    vse16.v v8, (a0)
2102; ZVFHMIN-NEXT:    ret
2103  %a = load <16 x half>, ptr %x
2104  %b = fneg <16 x half> %a
2105  store <16 x half> %b, ptr %x
2106  ret void
2107}
2108
2109define void @fneg_v8f32(ptr %x) {
2110; CHECK-LABEL: fneg_v8f32:
2111; CHECK:       # %bb.0:
2112; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2113; CHECK-NEXT:    vle32.v v8, (a0)
2114; CHECK-NEXT:    vfneg.v v8, v8
2115; CHECK-NEXT:    vse32.v v8, (a0)
2116; CHECK-NEXT:    ret
2117  %a = load <8 x float>, ptr %x
2118  %b = fneg <8 x float> %a
2119  store <8 x float> %b, ptr %x
2120  ret void
2121}
2122
2123define void @fneg_v4f64(ptr %x) {
2124; CHECK-LABEL: fneg_v4f64:
2125; CHECK:       # %bb.0:
2126; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2127; CHECK-NEXT:    vle64.v v8, (a0)
2128; CHECK-NEXT:    vfneg.v v8, v8
2129; CHECK-NEXT:    vse64.v v8, (a0)
2130; CHECK-NEXT:    ret
2131  %a = load <4 x double>, ptr %x
2132  %b = fneg <4 x double> %a
2133  store <4 x double> %b, ptr %x
2134  ret void
2135}
2136
2137define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) {
2138; CHECK-LABEL: fma_v16bf16:
2139; CHECK:       # %bb.0:
2140; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2141; CHECK-NEXT:    vle16.v v8, (a2)
2142; CHECK-NEXT:    vle16.v v10, (a0)
2143; CHECK-NEXT:    vle16.v v12, (a1)
2144; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
2145; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
2146; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
2147; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2148; CHECK-NEXT:    vfmadd.vv v8, v20, v16
2149; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2150; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
2151; CHECK-NEXT:    vse16.v v12, (a0)
2152; CHECK-NEXT:    ret
2153  %a = load <16 x bfloat>, ptr %x
2154  %b = load <16 x bfloat>, ptr %y
2155  %c = load <16 x bfloat>, ptr %z
2156  %d = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b, <16 x bfloat> %c)
2157  store <16 x bfloat> %d, ptr %x
2158  ret void
2159}
2160
2161define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
2162; ZVFH-LABEL: fma_v16f16:
2163; ZVFH:       # %bb.0:
2164; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2165; ZVFH-NEXT:    vle16.v v8, (a0)
2166; ZVFH-NEXT:    vle16.v v10, (a1)
2167; ZVFH-NEXT:    vle16.v v12, (a2)
2168; ZVFH-NEXT:    vfmacc.vv v12, v8, v10
2169; ZVFH-NEXT:    vse16.v v12, (a0)
2170; ZVFH-NEXT:    ret
2171;
2172; ZVFHMIN-LABEL: fma_v16f16:
2173; ZVFHMIN:       # %bb.0:
2174; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
2175; ZVFHMIN-NEXT:    vle16.v v8, (a2)
2176; ZVFHMIN-NEXT:    vle16.v v10, (a0)
2177; ZVFHMIN-NEXT:    vle16.v v12, (a1)
2178; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
2179; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
2180; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
2181; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2182; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v16
2183; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2184; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
2185; ZVFHMIN-NEXT:    vse16.v v12, (a0)
2186; ZVFHMIN-NEXT:    ret
2187  %a = load <16 x half>, ptr %x
2188  %b = load <16 x half>, ptr %y
2189  %c = load <16 x half>, ptr %z
2190  %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
2191  store <16 x half> %d, ptr %x
2192  ret void
2193}
2194
2195define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
2196; CHECK-LABEL: fma_v8f32:
2197; CHECK:       # %bb.0:
2198; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2199; CHECK-NEXT:    vle32.v v8, (a0)
2200; CHECK-NEXT:    vle32.v v10, (a1)
2201; CHECK-NEXT:    vle32.v v12, (a2)
2202; CHECK-NEXT:    vfmacc.vv v12, v8, v10
2203; CHECK-NEXT:    vse32.v v12, (a0)
2204; CHECK-NEXT:    ret
2205  %a = load <8 x float>, ptr %x
2206  %b = load <8 x float>, ptr %y
2207  %c = load <8 x float>, ptr %z
2208  %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
2209  store <8 x float> %d, ptr %x
2210  ret void
2211}
2212
2213define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
2214; CHECK-LABEL: fma_v4f64:
2215; CHECK:       # %bb.0:
2216; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2217; CHECK-NEXT:    vle64.v v8, (a0)
2218; CHECK-NEXT:    vle64.v v10, (a1)
2219; CHECK-NEXT:    vle64.v v12, (a2)
2220; CHECK-NEXT:    vfmacc.vv v12, v8, v10
2221; CHECK-NEXT:    vse64.v v12, (a0)
2222; CHECK-NEXT:    ret
2223  %a = load <4 x double>, ptr %x
2224  %b = load <4 x double>, ptr %y
2225  %c = load <4 x double>, ptr %z
2226  %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
2227  store <4 x double> %d, ptr %x
2228  ret void
2229}
2230
2231define void @fadd_vf_v8bf16(ptr %x, bfloat %y) {
2232; CHECK-LABEL: fadd_vf_v8bf16:
2233; CHECK:       # %bb.0:
2234; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2235; CHECK-NEXT:    vle16.v v8, (a0)
2236; CHECK-NEXT:    fmv.x.w a1, fa0
2237; CHECK-NEXT:    vmv.v.x v9, a1
2238; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2239; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2240; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2241; CHECK-NEXT:    vfadd.vv v8, v10, v12
2242; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2243; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2244; CHECK-NEXT:    vse16.v v10, (a0)
2245; CHECK-NEXT:    ret
2246  %a = load <8 x bfloat>, ptr %x
2247  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2248  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2249  %d = fadd <8 x bfloat> %a, %c
2250  store <8 x bfloat> %d, ptr %x
2251  ret void
2252}
2253
2254define void @fadd_vf_v6bf16(ptr %x, bfloat %y) {
2255; CHECK-LABEL: fadd_vf_v6bf16:
2256; CHECK:       # %bb.0:
2257; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2258; CHECK-NEXT:    vle16.v v8, (a0)
2259; CHECK-NEXT:    fmv.x.w a1, fa0
2260; CHECK-NEXT:    vmv.v.x v9, a1
2261; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2262; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2263; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2264; CHECK-NEXT:    vfadd.vv v8, v10, v12
2265; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2266; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2267; CHECK-NEXT:    vse16.v v10, (a0)
2268; CHECK-NEXT:    ret
2269  %a = load <6 x bfloat>, ptr %x
2270  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2271  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2272  %d = fadd <6 x bfloat> %a, %c
2273  store <6 x bfloat> %d, ptr %x
2274  ret void
2275}
2276
2277define void @fadd_vf_v8f16(ptr %x, half %y) {
2278; ZVFH-LABEL: fadd_vf_v8f16:
2279; ZVFH:       # %bb.0:
2280; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2281; ZVFH-NEXT:    vle16.v v8, (a0)
2282; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
2283; ZVFH-NEXT:    vse16.v v8, (a0)
2284; ZVFH-NEXT:    ret
2285;
2286; ZVFHMIN-LABEL: fadd_vf_v8f16:
2287; ZVFHMIN:       # %bb.0:
2288; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2289; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2290; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2291; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2292; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2293; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2294; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2295; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v12
2296; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2297; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2298; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2299; ZVFHMIN-NEXT:    ret
2300  %a = load <8 x half>, ptr %x
2301  %b = insertelement <8 x half> poison, half %y, i32 0
2302  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2303  %d = fadd <8 x half> %a, %c
2304  store <8 x half> %d, ptr %x
2305  ret void
2306}
2307
2308define void @fadd_vf_v6f16(ptr %x, half %y) {
2309; ZVFH-LABEL: fadd_vf_v6f16:
2310; ZVFH:       # %bb.0:
2311; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2312; ZVFH-NEXT:    vle16.v v8, (a0)
2313; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
2314; ZVFH-NEXT:    vse16.v v8, (a0)
2315; ZVFH-NEXT:    ret
2316;
2317; ZVFHMIN-LABEL: fadd_vf_v6f16:
2318; ZVFHMIN:       # %bb.0:
2319; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2320; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2321; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2322; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2323; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2324; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2325; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2326; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v12
2327; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2328; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2329; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2330; ZVFHMIN-NEXT:    ret
2331  %a = load <6 x half>, ptr %x
2332  %b = insertelement <6 x half> poison, half %y, i32 0
2333  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2334  %d = fadd <6 x half> %a, %c
2335  store <6 x half> %d, ptr %x
2336  ret void
2337}
2338
2339define void @fadd_vf_v4f32(ptr %x, float %y) {
2340; CHECK-LABEL: fadd_vf_v4f32:
2341; CHECK:       # %bb.0:
2342; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2343; CHECK-NEXT:    vle32.v v8, (a0)
2344; CHECK-NEXT:    vfadd.vf v8, v8, fa0
2345; CHECK-NEXT:    vse32.v v8, (a0)
2346; CHECK-NEXT:    ret
2347  %a = load <4 x float>, ptr %x
2348  %b = insertelement <4 x float> poison, float %y, i32 0
2349  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2350  %d = fadd <4 x float> %a, %c
2351  store <4 x float> %d, ptr %x
2352  ret void
2353}
2354
2355define void @fadd_vf_v2f64(ptr %x, double %y) {
2356; CHECK-LABEL: fadd_vf_v2f64:
2357; CHECK:       # %bb.0:
2358; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2359; CHECK-NEXT:    vle64.v v8, (a0)
2360; CHECK-NEXT:    vfadd.vf v8, v8, fa0
2361; CHECK-NEXT:    vse64.v v8, (a0)
2362; CHECK-NEXT:    ret
2363  %a = load <2 x double>, ptr %x
2364  %b = insertelement <2 x double> poison, double %y, i32 0
2365  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2366  %d = fadd <2 x double> %a, %c
2367  store <2 x double> %d, ptr %x
2368  ret void
2369}
2370
2371define void @fadd_fv_v8bf16(ptr %x, bfloat %y) {
2372; CHECK-LABEL: fadd_fv_v8bf16:
2373; CHECK:       # %bb.0:
2374; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2375; CHECK-NEXT:    vle16.v v8, (a0)
2376; CHECK-NEXT:    fmv.x.w a1, fa0
2377; CHECK-NEXT:    vmv.v.x v9, a1
2378; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2379; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2380; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2381; CHECK-NEXT:    vfadd.vv v8, v12, v10
2382; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2383; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2384; CHECK-NEXT:    vse16.v v10, (a0)
2385; CHECK-NEXT:    ret
2386  %a = load <8 x bfloat>, ptr %x
2387  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2388  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2389  %d = fadd <8 x bfloat> %c, %a
2390  store <8 x bfloat> %d, ptr %x
2391  ret void
2392}
2393
2394define void @fadd_fv_v6bf16(ptr %x, bfloat %y) {
2395; CHECK-LABEL: fadd_fv_v6bf16:
2396; CHECK:       # %bb.0:
2397; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2398; CHECK-NEXT:    vle16.v v8, (a0)
2399; CHECK-NEXT:    fmv.x.w a1, fa0
2400; CHECK-NEXT:    vmv.v.x v9, a1
2401; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2402; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2403; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2404; CHECK-NEXT:    vfadd.vv v8, v12, v10
2405; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2406; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2407; CHECK-NEXT:    vse16.v v10, (a0)
2408; CHECK-NEXT:    ret
2409  %a = load <6 x bfloat>, ptr %x
2410  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2411  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2412  %d = fadd <6 x bfloat> %c, %a
2413  store <6 x bfloat> %d, ptr %x
2414  ret void
2415}
2416
2417define void @fadd_fv_v8f16(ptr %x, half %y) {
2418; ZVFH-LABEL: fadd_fv_v8f16:
2419; ZVFH:       # %bb.0:
2420; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2421; ZVFH-NEXT:    vle16.v v8, (a0)
2422; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
2423; ZVFH-NEXT:    vse16.v v8, (a0)
2424; ZVFH-NEXT:    ret
2425;
2426; ZVFHMIN-LABEL: fadd_fv_v8f16:
2427; ZVFHMIN:       # %bb.0:
2428; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2429; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2430; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2431; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2432; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2433; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2434; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2435; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
2436; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2437; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2438; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2439; ZVFHMIN-NEXT:    ret
2440  %a = load <8 x half>, ptr %x
2441  %b = insertelement <8 x half> poison, half %y, i32 0
2442  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2443  %d = fadd <8 x half> %c, %a
2444  store <8 x half> %d, ptr %x
2445  ret void
2446}
2447
2448define void @fadd_fv_v6f16(ptr %x, half %y) {
2449; ZVFH-LABEL: fadd_fv_v6f16:
2450; ZVFH:       # %bb.0:
2451; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2452; ZVFH-NEXT:    vle16.v v8, (a0)
2453; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
2454; ZVFH-NEXT:    vse16.v v8, (a0)
2455; ZVFH-NEXT:    ret
2456;
2457; ZVFHMIN-LABEL: fadd_fv_v6f16:
2458; ZVFHMIN:       # %bb.0:
2459; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2460; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2461; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2462; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2463; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2464; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2465; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2466; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
2467; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2468; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2469; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2470; ZVFHMIN-NEXT:    ret
2471  %a = load <6 x half>, ptr %x
2472  %b = insertelement <6 x half> poison, half %y, i32 0
2473  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2474  %d = fadd <6 x half> %c, %a
2475  store <6 x half> %d, ptr %x
2476  ret void
2477}
2478
2479define void @fadd_fv_v4f32(ptr %x, float %y) {
2480; CHECK-LABEL: fadd_fv_v4f32:
2481; CHECK:       # %bb.0:
2482; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2483; CHECK-NEXT:    vle32.v v8, (a0)
2484; CHECK-NEXT:    vfadd.vf v8, v8, fa0
2485; CHECK-NEXT:    vse32.v v8, (a0)
2486; CHECK-NEXT:    ret
2487  %a = load <4 x float>, ptr %x
2488  %b = insertelement <4 x float> poison, float %y, i32 0
2489  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2490  %d = fadd <4 x float> %c, %a
2491  store <4 x float> %d, ptr %x
2492  ret void
2493}
2494
2495define void @fadd_fv_v2f64(ptr %x, double %y) {
2496; CHECK-LABEL: fadd_fv_v2f64:
2497; CHECK:       # %bb.0:
2498; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2499; CHECK-NEXT:    vle64.v v8, (a0)
2500; CHECK-NEXT:    vfadd.vf v8, v8, fa0
2501; CHECK-NEXT:    vse64.v v8, (a0)
2502; CHECK-NEXT:    ret
2503  %a = load <2 x double>, ptr %x
2504  %b = insertelement <2 x double> poison, double %y, i32 0
2505  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2506  %d = fadd <2 x double> %c, %a
2507  store <2 x double> %d, ptr %x
2508  ret void
2509}
2510
2511define void @fsub_vf_v8bf16(ptr %x, bfloat %y) {
2512; CHECK-LABEL: fsub_vf_v8bf16:
2513; CHECK:       # %bb.0:
2514; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2515; CHECK-NEXT:    vle16.v v8, (a0)
2516; CHECK-NEXT:    fmv.x.w a1, fa0
2517; CHECK-NEXT:    vmv.v.x v9, a1
2518; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2519; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2520; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2521; CHECK-NEXT:    vfsub.vv v8, v10, v12
2522; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2523; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2524; CHECK-NEXT:    vse16.v v10, (a0)
2525; CHECK-NEXT:    ret
2526  %a = load <8 x bfloat>, ptr %x
2527  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2528  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2529  %d = fsub <8 x bfloat> %a, %c
2530  store <8 x bfloat> %d, ptr %x
2531  ret void
2532}
2533
2534define void @fsub_vf_v6bf16(ptr %x, bfloat %y) {
2535; CHECK-LABEL: fsub_vf_v6bf16:
2536; CHECK:       # %bb.0:
2537; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2538; CHECK-NEXT:    vle16.v v8, (a0)
2539; CHECK-NEXT:    fmv.x.w a1, fa0
2540; CHECK-NEXT:    vmv.v.x v9, a1
2541; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2542; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2543; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2544; CHECK-NEXT:    vfsub.vv v8, v10, v12
2545; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2546; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2547; CHECK-NEXT:    vse16.v v10, (a0)
2548; CHECK-NEXT:    ret
2549  %a = load <6 x bfloat>, ptr %x
2550  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2551  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2552  %d = fsub <6 x bfloat> %a, %c
2553  store <6 x bfloat> %d, ptr %x
2554  ret void
2555}
2556
2557define void @fsub_vf_v8f16(ptr %x, half %y) {
2558; ZVFH-LABEL: fsub_vf_v8f16:
2559; ZVFH:       # %bb.0:
2560; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2561; ZVFH-NEXT:    vle16.v v8, (a0)
2562; ZVFH-NEXT:    vfsub.vf v8, v8, fa0
2563; ZVFH-NEXT:    vse16.v v8, (a0)
2564; ZVFH-NEXT:    ret
2565;
2566; ZVFHMIN-LABEL: fsub_vf_v8f16:
2567; ZVFHMIN:       # %bb.0:
2568; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2569; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2570; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2571; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2572; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2573; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2574; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2575; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v12
2576; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2577; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2578; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2579; ZVFHMIN-NEXT:    ret
2580  %a = load <8 x half>, ptr %x
2581  %b = insertelement <8 x half> poison, half %y, i32 0
2582  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2583  %d = fsub <8 x half> %a, %c
2584  store <8 x half> %d, ptr %x
2585  ret void
2586}
2587
2588define void @fsub_vf_v6f16(ptr %x, half %y) {
2589; ZVFH-LABEL: fsub_vf_v6f16:
2590; ZVFH:       # %bb.0:
2591; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2592; ZVFH-NEXT:    vle16.v v8, (a0)
2593; ZVFH-NEXT:    vfsub.vf v8, v8, fa0
2594; ZVFH-NEXT:    vse16.v v8, (a0)
2595; ZVFH-NEXT:    ret
2596;
2597; ZVFHMIN-LABEL: fsub_vf_v6f16:
2598; ZVFHMIN:       # %bb.0:
2599; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2600; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2601; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2602; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2603; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2604; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2605; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2606; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v12
2607; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2608; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2609; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2610; ZVFHMIN-NEXT:    ret
2611  %a = load <6 x half>, ptr %x
2612  %b = insertelement <6 x half> poison, half %y, i32 0
2613  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2614  %d = fsub <6 x half> %a, %c
2615  store <6 x half> %d, ptr %x
2616  ret void
2617}
2618
2619define void @fsub_vf_v4f32(ptr %x, float %y) {
2620; CHECK-LABEL: fsub_vf_v4f32:
2621; CHECK:       # %bb.0:
2622; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2623; CHECK-NEXT:    vle32.v v8, (a0)
2624; CHECK-NEXT:    vfsub.vf v8, v8, fa0
2625; CHECK-NEXT:    vse32.v v8, (a0)
2626; CHECK-NEXT:    ret
2627  %a = load <4 x float>, ptr %x
2628  %b = insertelement <4 x float> poison, float %y, i32 0
2629  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2630  %d = fsub <4 x float> %a, %c
2631  store <4 x float> %d, ptr %x
2632  ret void
2633}
2634
2635define void @fsub_vf_v2f64(ptr %x, double %y) {
2636; CHECK-LABEL: fsub_vf_v2f64:
2637; CHECK:       # %bb.0:
2638; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2639; CHECK-NEXT:    vle64.v v8, (a0)
2640; CHECK-NEXT:    vfsub.vf v8, v8, fa0
2641; CHECK-NEXT:    vse64.v v8, (a0)
2642; CHECK-NEXT:    ret
2643  %a = load <2 x double>, ptr %x
2644  %b = insertelement <2 x double> poison, double %y, i32 0
2645  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2646  %d = fsub <2 x double> %a, %c
2647  store <2 x double> %d, ptr %x
2648  ret void
2649}
2650
2651define void @fsub_fv_v8bf16(ptr %x, bfloat %y) {
2652; CHECK-LABEL: fsub_fv_v8bf16:
2653; CHECK:       # %bb.0:
2654; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2655; CHECK-NEXT:    vle16.v v8, (a0)
2656; CHECK-NEXT:    fmv.x.w a1, fa0
2657; CHECK-NEXT:    vmv.v.x v9, a1
2658; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2659; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2660; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2661; CHECK-NEXT:    vfsub.vv v8, v12, v10
2662; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2663; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2664; CHECK-NEXT:    vse16.v v10, (a0)
2665; CHECK-NEXT:    ret
2666  %a = load <8 x bfloat>, ptr %x
2667  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2668  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2669  %d = fsub <8 x bfloat> %c, %a
2670  store <8 x bfloat> %d, ptr %x
2671  ret void
2672}
2673
2674define void @fsub_fv_v6bf16(ptr %x, bfloat %y) {
2675; CHECK-LABEL: fsub_fv_v6bf16:
2676; CHECK:       # %bb.0:
2677; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2678; CHECK-NEXT:    vle16.v v8, (a0)
2679; CHECK-NEXT:    fmv.x.w a1, fa0
2680; CHECK-NEXT:    vmv.v.x v9, a1
2681; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2682; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2683; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2684; CHECK-NEXT:    vfsub.vv v8, v12, v10
2685; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2686; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2687; CHECK-NEXT:    vse16.v v10, (a0)
2688; CHECK-NEXT:    ret
2689  %a = load <6 x bfloat>, ptr %x
2690  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2691  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2692  %d = fsub <6 x bfloat> %c, %a
2693  store <6 x bfloat> %d, ptr %x
2694  ret void
2695}
2696
2697define void @fsub_fv_v8f16(ptr %x, half %y) {
2698; ZVFH-LABEL: fsub_fv_v8f16:
2699; ZVFH:       # %bb.0:
2700; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2701; ZVFH-NEXT:    vle16.v v8, (a0)
2702; ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
2703; ZVFH-NEXT:    vse16.v v8, (a0)
2704; ZVFH-NEXT:    ret
2705;
2706; ZVFHMIN-LABEL: fsub_fv_v8f16:
2707; ZVFHMIN:       # %bb.0:
2708; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2709; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2710; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2711; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2712; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2713; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2714; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2715; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
2716; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2717; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2718; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2719; ZVFHMIN-NEXT:    ret
2720  %a = load <8 x half>, ptr %x
2721  %b = insertelement <8 x half> poison, half %y, i32 0
2722  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2723  %d = fsub <8 x half> %c, %a
2724  store <8 x half> %d, ptr %x
2725  ret void
2726}
2727
2728define void @fsub_fv_v6f16(ptr %x, half %y) {
2729; ZVFH-LABEL: fsub_fv_v6f16:
2730; ZVFH:       # %bb.0:
2731; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2732; ZVFH-NEXT:    vle16.v v8, (a0)
2733; ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
2734; ZVFH-NEXT:    vse16.v v8, (a0)
2735; ZVFH-NEXT:    ret
2736;
2737; ZVFHMIN-LABEL: fsub_fv_v6f16:
2738; ZVFHMIN:       # %bb.0:
2739; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2740; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2741; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2742; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2743; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2744; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2745; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2746; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
2747; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2748; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2749; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2750; ZVFHMIN-NEXT:    ret
2751  %a = load <6 x half>, ptr %x
2752  %b = insertelement <6 x half> poison, half %y, i32 0
2753  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2754  %d = fsub <6 x half> %c, %a
2755  store <6 x half> %d, ptr %x
2756  ret void
2757}
2758
2759define void @fsub_fv_v4f32(ptr %x, float %y) {
2760; CHECK-LABEL: fsub_fv_v4f32:
2761; CHECK:       # %bb.0:
2762; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2763; CHECK-NEXT:    vle32.v v8, (a0)
2764; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
2765; CHECK-NEXT:    vse32.v v8, (a0)
2766; CHECK-NEXT:    ret
2767  %a = load <4 x float>, ptr %x
2768  %b = insertelement <4 x float> poison, float %y, i32 0
2769  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2770  %d = fsub <4 x float> %c, %a
2771  store <4 x float> %d, ptr %x
2772  ret void
2773}
2774
2775define void @fsub_fv_v2f64(ptr %x, double %y) {
2776; CHECK-LABEL: fsub_fv_v2f64:
2777; CHECK:       # %bb.0:
2778; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2779; CHECK-NEXT:    vle64.v v8, (a0)
2780; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
2781; CHECK-NEXT:    vse64.v v8, (a0)
2782; CHECK-NEXT:    ret
2783  %a = load <2 x double>, ptr %x
2784  %b = insertelement <2 x double> poison, double %y, i32 0
2785  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2786  %d = fsub <2 x double> %c, %a
2787  store <2 x double> %d, ptr %x
2788  ret void
2789}
2790
2791define void @fmul_vf_v8bf16(ptr %x, bfloat %y) {
2792; CHECK-LABEL: fmul_vf_v8bf16:
2793; CHECK:       # %bb.0:
2794; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2795; CHECK-NEXT:    vle16.v v8, (a0)
2796; CHECK-NEXT:    fmv.x.w a1, fa0
2797; CHECK-NEXT:    vmv.v.x v9, a1
2798; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2799; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2800; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2801; CHECK-NEXT:    vfmul.vv v8, v10, v12
2802; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2803; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2804; CHECK-NEXT:    vse16.v v10, (a0)
2805; CHECK-NEXT:    ret
2806  %a = load <8 x bfloat>, ptr %x
2807  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2808  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2809  %d = fmul <8 x bfloat> %a, %c
2810  store <8 x bfloat> %d, ptr %x
2811  ret void
2812}
2813
2814define void @fmul_vf_v6bf16(ptr %x, bfloat %y) {
2815; CHECK-LABEL: fmul_vf_v6bf16:
2816; CHECK:       # %bb.0:
2817; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2818; CHECK-NEXT:    vle16.v v8, (a0)
2819; CHECK-NEXT:    fmv.x.w a1, fa0
2820; CHECK-NEXT:    vmv.v.x v9, a1
2821; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2822; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2823; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2824; CHECK-NEXT:    vfmul.vv v8, v10, v12
2825; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2826; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2827; CHECK-NEXT:    vse16.v v10, (a0)
2828; CHECK-NEXT:    ret
2829  %a = load <6 x bfloat>, ptr %x
2830  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2831  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2832  %d = fmul <6 x bfloat> %a, %c
2833  store <6 x bfloat> %d, ptr %x
2834  ret void
2835}
2836
2837define void @fmul_vf_v8f16(ptr %x, half %y) {
2838; ZVFH-LABEL: fmul_vf_v8f16:
2839; ZVFH:       # %bb.0:
2840; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2841; ZVFH-NEXT:    vle16.v v8, (a0)
2842; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
2843; ZVFH-NEXT:    vse16.v v8, (a0)
2844; ZVFH-NEXT:    ret
2845;
2846; ZVFHMIN-LABEL: fmul_vf_v8f16:
2847; ZVFHMIN:       # %bb.0:
2848; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2849; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2850; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2851; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2852; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2853; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2854; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2855; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v12
2856; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2857; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2858; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2859; ZVFHMIN-NEXT:    ret
2860  %a = load <8 x half>, ptr %x
2861  %b = insertelement <8 x half> poison, half %y, i32 0
2862  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2863  %d = fmul <8 x half> %a, %c
2864  store <8 x half> %d, ptr %x
2865  ret void
2866}
2867
2868define void @fmul_vf_v6f16(ptr %x, half %y) {
2869; ZVFH-LABEL: fmul_vf_v6f16:
2870; ZVFH:       # %bb.0:
2871; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2872; ZVFH-NEXT:    vle16.v v8, (a0)
2873; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
2874; ZVFH-NEXT:    vse16.v v8, (a0)
2875; ZVFH-NEXT:    ret
2876;
2877; ZVFHMIN-LABEL: fmul_vf_v6f16:
2878; ZVFHMIN:       # %bb.0:
2879; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2880; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2881; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2882; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2883; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2884; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2885; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2886; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v12
2887; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2888; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2889; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2890; ZVFHMIN-NEXT:    ret
2891  %a = load <6 x half>, ptr %x
2892  %b = insertelement <6 x half> poison, half %y, i32 0
2893  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2894  %d = fmul <6 x half> %a, %c
2895  store <6 x half> %d, ptr %x
2896  ret void
2897}
2898
2899define void @fmul_vf_v4f32(ptr %x, float %y) {
2900; CHECK-LABEL: fmul_vf_v4f32:
2901; CHECK:       # %bb.0:
2902; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2903; CHECK-NEXT:    vle32.v v8, (a0)
2904; CHECK-NEXT:    vfmul.vf v8, v8, fa0
2905; CHECK-NEXT:    vse32.v v8, (a0)
2906; CHECK-NEXT:    ret
2907  %a = load <4 x float>, ptr %x
2908  %b = insertelement <4 x float> poison, float %y, i32 0
2909  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2910  %d = fmul <4 x float> %a, %c
2911  store <4 x float> %d, ptr %x
2912  ret void
2913}
2914
2915define void @fmul_vf_v2f64(ptr %x, double %y) {
2916; CHECK-LABEL: fmul_vf_v2f64:
2917; CHECK:       # %bb.0:
2918; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2919; CHECK-NEXT:    vle64.v v8, (a0)
2920; CHECK-NEXT:    vfmul.vf v8, v8, fa0
2921; CHECK-NEXT:    vse64.v v8, (a0)
2922; CHECK-NEXT:    ret
2923  %a = load <2 x double>, ptr %x
2924  %b = insertelement <2 x double> poison, double %y, i32 0
2925  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2926  %d = fmul <2 x double> %a, %c
2927  store <2 x double> %d, ptr %x
2928  ret void
2929}
2930
2931define void @fmul_fv_v8bf16(ptr %x, bfloat %y) {
2932; CHECK-LABEL: fmul_fv_v8bf16:
2933; CHECK:       # %bb.0:
2934; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2935; CHECK-NEXT:    vle16.v v8, (a0)
2936; CHECK-NEXT:    fmv.x.w a1, fa0
2937; CHECK-NEXT:    vmv.v.x v9, a1
2938; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2939; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2940; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2941; CHECK-NEXT:    vfmul.vv v8, v12, v10
2942; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2943; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2944; CHECK-NEXT:    vse16.v v10, (a0)
2945; CHECK-NEXT:    ret
2946  %a = load <8 x bfloat>, ptr %x
2947  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2948  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2949  %d = fmul <8 x bfloat> %c, %a
2950  store <8 x bfloat> %d, ptr %x
2951  ret void
2952}
2953
2954define void @fmul_fv_v6bf16(ptr %x, bfloat %y) {
2955; CHECK-LABEL: fmul_fv_v6bf16:
2956; CHECK:       # %bb.0:
2957; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
2958; CHECK-NEXT:    vle16.v v8, (a0)
2959; CHECK-NEXT:    fmv.x.w a1, fa0
2960; CHECK-NEXT:    vmv.v.x v9, a1
2961; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
2962; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
2963; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2964; CHECK-NEXT:    vfmul.vv v8, v12, v10
2965; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2966; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
2967; CHECK-NEXT:    vse16.v v10, (a0)
2968; CHECK-NEXT:    ret
2969  %a = load <6 x bfloat>, ptr %x
2970  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2971  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2972  %d = fmul <6 x bfloat> %c, %a
2973  store <6 x bfloat> %d, ptr %x
2974  ret void
2975}
2976
2977define void @fmul_fv_v8f16(ptr %x, half %y) {
2978; ZVFH-LABEL: fmul_fv_v8f16:
2979; ZVFH:       # %bb.0:
2980; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2981; ZVFH-NEXT:    vle16.v v8, (a0)
2982; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
2983; ZVFH-NEXT:    vse16.v v8, (a0)
2984; ZVFH-NEXT:    ret
2985;
2986; ZVFHMIN-LABEL: fmul_fv_v8f16:
2987; ZVFHMIN:       # %bb.0:
2988; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2989; ZVFHMIN-NEXT:    vle16.v v8, (a0)
2990; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
2991; ZVFHMIN-NEXT:    vmv.v.x v9, a1
2992; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
2993; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
2994; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2995; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
2996; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2997; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
2998; ZVFHMIN-NEXT:    vse16.v v10, (a0)
2999; ZVFHMIN-NEXT:    ret
3000  %a = load <8 x half>, ptr %x
3001  %b = insertelement <8 x half> poison, half %y, i32 0
3002  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3003  %d = fmul <8 x half> %c, %a
3004  store <8 x half> %d, ptr %x
3005  ret void
3006}
3007
3008define void @fmul_fv_v6f16(ptr %x, half %y) {
3009; ZVFH-LABEL: fmul_fv_v6f16:
3010; ZVFH:       # %bb.0:
3011; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3012; ZVFH-NEXT:    vle16.v v8, (a0)
3013; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
3014; ZVFH-NEXT:    vse16.v v8, (a0)
3015; ZVFH-NEXT:    ret
3016;
3017; ZVFHMIN-LABEL: fmul_fv_v6f16:
3018; ZVFHMIN:       # %bb.0:
3019; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3020; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3021; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3022; ZVFHMIN-NEXT:    vmv.v.x v9, a1
3023; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3024; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3025; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3026; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
3027; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3028; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3029; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3030; ZVFHMIN-NEXT:    ret
3031  %a = load <6 x half>, ptr %x
3032  %b = insertelement <6 x half> poison, half %y, i32 0
3033  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3034  %d = fmul <6 x half> %c, %a
3035  store <6 x half> %d, ptr %x
3036  ret void
3037}
3038
3039define void @fmul_fv_v4f32(ptr %x, float %y) {
3040; CHECK-LABEL: fmul_fv_v4f32:
3041; CHECK:       # %bb.0:
3042; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3043; CHECK-NEXT:    vle32.v v8, (a0)
3044; CHECK-NEXT:    vfmul.vf v8, v8, fa0
3045; CHECK-NEXT:    vse32.v v8, (a0)
3046; CHECK-NEXT:    ret
3047  %a = load <4 x float>, ptr %x
3048  %b = insertelement <4 x float> poison, float %y, i32 0
3049  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3050  %d = fmul <4 x float> %c, %a
3051  store <4 x float> %d, ptr %x
3052  ret void
3053}
3054
3055define void @fmul_fv_v2f64(ptr %x, double %y) {
3056; CHECK-LABEL: fmul_fv_v2f64:
3057; CHECK:       # %bb.0:
3058; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3059; CHECK-NEXT:    vle64.v v8, (a0)
3060; CHECK-NEXT:    vfmul.vf v8, v8, fa0
3061; CHECK-NEXT:    vse64.v v8, (a0)
3062; CHECK-NEXT:    ret
3063  %a = load <2 x double>, ptr %x
3064  %b = insertelement <2 x double> poison, double %y, i32 0
3065  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3066  %d = fmul <2 x double> %c, %a
3067  store <2 x double> %d, ptr %x
3068  ret void
3069}
3070
3071define void @fdiv_vf_v8bf16(ptr %x, bfloat %y) {
3072; CHECK-LABEL: fdiv_vf_v8bf16:
3073; CHECK:       # %bb.0:
3074; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3075; CHECK-NEXT:    vle16.v v8, (a0)
3076; CHECK-NEXT:    fmv.x.w a1, fa0
3077; CHECK-NEXT:    vmv.v.x v9, a1
3078; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3079; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3080; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3081; CHECK-NEXT:    vfdiv.vv v8, v10, v12
3082; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3083; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3084; CHECK-NEXT:    vse16.v v10, (a0)
3085; CHECK-NEXT:    ret
3086  %a = load <8 x bfloat>, ptr %x
3087  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
3088  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
3089  %d = fdiv <8 x bfloat> %a, %c
3090  store <8 x bfloat> %d, ptr %x
3091  ret void
3092}
3093
3094define void @fdiv_vf_v6bf16(ptr %x, bfloat %y) {
3095; CHECK-LABEL: fdiv_vf_v6bf16:
3096; CHECK:       # %bb.0:
3097; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3098; CHECK-NEXT:    vle16.v v8, (a0)
3099; CHECK-NEXT:    fmv.x.w a1, fa0
3100; CHECK-NEXT:    vmv.v.x v9, a1
3101; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3102; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3103; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3104; CHECK-NEXT:    vfdiv.vv v8, v10, v12
3105; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3106; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3107; CHECK-NEXT:    vse16.v v10, (a0)
3108; CHECK-NEXT:    ret
3109  %a = load <6 x bfloat>, ptr %x
3110  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
3111  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
3112  %d = fdiv <6 x bfloat> %a, %c
3113  store <6 x bfloat> %d, ptr %x
3114  ret void
3115}
3116
3117define void @fdiv_vf_v8f16(ptr %x, half %y) {
3118; ZVFH-LABEL: fdiv_vf_v8f16:
3119; ZVFH:       # %bb.0:
3120; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3121; ZVFH-NEXT:    vle16.v v8, (a0)
3122; ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
3123; ZVFH-NEXT:    vse16.v v8, (a0)
3124; ZVFH-NEXT:    ret
3125;
3126; ZVFHMIN-LABEL: fdiv_vf_v8f16:
3127; ZVFHMIN:       # %bb.0:
3128; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3129; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3130; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3131; ZVFHMIN-NEXT:    vmv.v.x v9, a1
3132; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3133; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3134; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3135; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v12
3136; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3137; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3138; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3139; ZVFHMIN-NEXT:    ret
3140  %a = load <8 x half>, ptr %x
3141  %b = insertelement <8 x half> poison, half %y, i32 0
3142  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3143  %d = fdiv <8 x half> %a, %c
3144  store <8 x half> %d, ptr %x
3145  ret void
3146}
3147
3148define void @fdiv_vf_v6f16(ptr %x, half %y) {
3149; ZVFH-LABEL: fdiv_vf_v6f16:
3150; ZVFH:       # %bb.0:
3151; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3152; ZVFH-NEXT:    vle16.v v8, (a0)
3153; ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
3154; ZVFH-NEXT:    vse16.v v8, (a0)
3155; ZVFH-NEXT:    ret
3156;
3157; ZVFHMIN-LABEL: fdiv_vf_v6f16:
3158; ZVFHMIN:       # %bb.0:
3159; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3160; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3161; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3162; ZVFHMIN-NEXT:    vmv.v.x v9, a1
3163; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3164; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3165; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3166; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v12
3167; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3168; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3169; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3170; ZVFHMIN-NEXT:    ret
3171  %a = load <6 x half>, ptr %x
3172  %b = insertelement <6 x half> poison, half %y, i32 0
3173  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3174  %d = fdiv <6 x half> %a, %c
3175  store <6 x half> %d, ptr %x
3176  ret void
3177}
3178
3179define void @fdiv_vf_v4f32(ptr %x, float %y) {
3180; CHECK-LABEL: fdiv_vf_v4f32:
3181; CHECK:       # %bb.0:
3182; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3183; CHECK-NEXT:    vle32.v v8, (a0)
3184; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
3185; CHECK-NEXT:    vse32.v v8, (a0)
3186; CHECK-NEXT:    ret
3187  %a = load <4 x float>, ptr %x
3188  %b = insertelement <4 x float> poison, float %y, i32 0
3189  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3190  %d = fdiv <4 x float> %a, %c
3191  store <4 x float> %d, ptr %x
3192  ret void
3193}
3194
3195define void @fdiv_vf_v2f64(ptr %x, double %y) {
3196; CHECK-LABEL: fdiv_vf_v2f64:
3197; CHECK:       # %bb.0:
3198; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3199; CHECK-NEXT:    vle64.v v8, (a0)
3200; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
3201; CHECK-NEXT:    vse64.v v8, (a0)
3202; CHECK-NEXT:    ret
3203  %a = load <2 x double>, ptr %x
3204  %b = insertelement <2 x double> poison, double %y, i32 0
3205  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3206  %d = fdiv <2 x double> %a, %c
3207  store <2 x double> %d, ptr %x
3208  ret void
3209}
3210
3211define void @fdiv_fv_v8bf16(ptr %x, bfloat %y) {
3212; CHECK-LABEL: fdiv_fv_v8bf16:
3213; CHECK:       # %bb.0:
3214; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3215; CHECK-NEXT:    vle16.v v8, (a0)
3216; CHECK-NEXT:    fmv.x.w a1, fa0
3217; CHECK-NEXT:    vmv.v.x v9, a1
3218; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3219; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3220; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3221; CHECK-NEXT:    vfdiv.vv v8, v12, v10
3222; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3223; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3224; CHECK-NEXT:    vse16.v v10, (a0)
3225; CHECK-NEXT:    ret
3226  %a = load <8 x bfloat>, ptr %x
3227  %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
3228  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
3229  %d = fdiv <8 x bfloat> %c, %a
3230  store <8 x bfloat> %d, ptr %x
3231  ret void
3232}
3233
3234define void @fdiv_fv_v6bf16(ptr %x, bfloat %y) {
3235; CHECK-LABEL: fdiv_fv_v6bf16:
3236; CHECK:       # %bb.0:
3237; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3238; CHECK-NEXT:    vle16.v v8, (a0)
3239; CHECK-NEXT:    fmv.x.w a1, fa0
3240; CHECK-NEXT:    vmv.v.x v9, a1
3241; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3242; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3243; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3244; CHECK-NEXT:    vfdiv.vv v8, v12, v10
3245; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3246; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3247; CHECK-NEXT:    vse16.v v10, (a0)
3248; CHECK-NEXT:    ret
3249  %a = load <6 x bfloat>, ptr %x
3250  %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
3251  %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
3252  %d = fdiv <6 x bfloat> %c, %a
3253  store <6 x bfloat> %d, ptr %x
3254  ret void
3255}
3256
3257define void @fdiv_fv_v8f16(ptr %x, half %y) {
3258; ZVFH-LABEL: fdiv_fv_v8f16:
3259; ZVFH:       # %bb.0:
3260; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3261; ZVFH-NEXT:    vle16.v v8, (a0)
3262; ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
3263; ZVFH-NEXT:    vse16.v v8, (a0)
3264; ZVFH-NEXT:    ret
3265;
3266; ZVFHMIN-LABEL: fdiv_fv_v8f16:
3267; ZVFHMIN:       # %bb.0:
3268; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3269; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3270; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3271; ZVFHMIN-NEXT:    vmv.v.x v9, a1
3272; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3273; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3274; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3275; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
3276; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3277; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3278; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3279; ZVFHMIN-NEXT:    ret
3280  %a = load <8 x half>, ptr %x
3281  %b = insertelement <8 x half> poison, half %y, i32 0
3282  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3283  %d = fdiv <8 x half> %c, %a
3284  store <8 x half> %d, ptr %x
3285  ret void
3286}
3287
3288define void @fdiv_fv_v6f16(ptr %x, half %y) {
3289; ZVFH-LABEL: fdiv_fv_v6f16:
3290; ZVFH:       # %bb.0:
3291; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3292; ZVFH-NEXT:    vle16.v v8, (a0)
3293; ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
3294; ZVFH-NEXT:    vse16.v v8, (a0)
3295; ZVFH-NEXT:    ret
3296;
3297; ZVFHMIN-LABEL: fdiv_fv_v6f16:
3298; ZVFHMIN:       # %bb.0:
3299; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3300; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3301; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3302; ZVFHMIN-NEXT:    vmv.v.x v9, a1
3303; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3304; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3305; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3306; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
3307; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3308; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3309; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3310; ZVFHMIN-NEXT:    ret
3311  %a = load <6 x half>, ptr %x
3312  %b = insertelement <6 x half> poison, half %y, i32 0
3313  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3314  %d = fdiv <6 x half> %c, %a
3315  store <6 x half> %d, ptr %x
3316  ret void
3317}
3318
3319define void @fdiv_fv_v4f32(ptr %x, float %y) {
3320; CHECK-LABEL: fdiv_fv_v4f32:
3321; CHECK:       # %bb.0:
3322; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3323; CHECK-NEXT:    vle32.v v8, (a0)
3324; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
3325; CHECK-NEXT:    vse32.v v8, (a0)
3326; CHECK-NEXT:    ret
3327  %a = load <4 x float>, ptr %x
3328  %b = insertelement <4 x float> poison, float %y, i32 0
3329  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3330  %d = fdiv <4 x float> %c, %a
3331  store <4 x float> %d, ptr %x
3332  ret void
3333}
3334
3335define void @fdiv_fv_v2f64(ptr %x, double %y) {
3336; CHECK-LABEL: fdiv_fv_v2f64:
3337; CHECK:       # %bb.0:
3338; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3339; CHECK-NEXT:    vle64.v v8, (a0)
3340; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
3341; CHECK-NEXT:    vse64.v v8, (a0)
3342; CHECK-NEXT:    ret
3343  %a = load <2 x double>, ptr %x
3344  %b = insertelement <2 x double> poison, double %y, i32 0
3345  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3346  %d = fdiv <2 x double> %c, %a
3347  store <2 x double> %d, ptr %x
3348  ret void
3349}
3350
3351define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
3352; CHECK-LABEL: fma_vf_v8bf16:
3353; CHECK:       # %bb.0:
3354; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3355; CHECK-NEXT:    vle16.v v8, (a1)
3356; CHECK-NEXT:    vle16.v v9, (a0)
3357; CHECK-NEXT:    fmv.x.w a1, fa0
3358; CHECK-NEXT:    vmv.v.x v10, a1
3359; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
3360; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
3361; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3362; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3363; CHECK-NEXT:    vfmadd.vv v8, v14, v12
3364; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3365; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3366; CHECK-NEXT:    vse16.v v10, (a0)
3367; CHECK-NEXT:    ret
3368  %a = load <8 x bfloat>, ptr %x
3369  %b = load <8 x bfloat>, ptr %y
3370  %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3371  %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3372  %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %b)
3373  store <8 x bfloat> %e, ptr %x
3374  ret void
3375}
3376
3377define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
3378; CHECK-LABEL: fma_vf_v6bf16:
3379; CHECK:       # %bb.0:
3380; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3381; CHECK-NEXT:    vle16.v v8, (a1)
3382; CHECK-NEXT:    vle16.v v9, (a0)
3383; CHECK-NEXT:    fmv.x.w a1, fa0
3384; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3385; CHECK-NEXT:    vmv.v.x v10, a1
3386; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
3387; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
3388; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3389; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3390; CHECK-NEXT:    vfmadd.vv v8, v14, v12
3391; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3392; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3393; CHECK-NEXT:    vse16.v v10, (a0)
3394; CHECK-NEXT:    ret
3395  %a = load <6 x bfloat>, ptr %x
3396  %b = load <6 x bfloat>, ptr %y
3397  %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3398  %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3399  %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %b)
3400  store <6 x bfloat> %e, ptr %x
3401  ret void
3402}
3403
3404define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
3405; ZVFH-LABEL: fma_vf_v8f16:
3406; ZVFH:       # %bb.0:
3407; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3408; ZVFH-NEXT:    vle16.v v8, (a0)
3409; ZVFH-NEXT:    vle16.v v9, (a1)
3410; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
3411; ZVFH-NEXT:    vse16.v v9, (a0)
3412; ZVFH-NEXT:    ret
3413;
3414; ZVFHMIN-LABEL: fma_vf_v8f16:
3415; ZVFHMIN:       # %bb.0:
3416; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3417; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3418; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3419; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3420; ZVFHMIN-NEXT:    vmv.v.x v10, a1
3421; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
3422; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
3423; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3424; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3425; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
3426; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3427; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3428; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3429; ZVFHMIN-NEXT:    ret
3430  %a = load <8 x half>, ptr %x
3431  %b = load <8 x half>, ptr %y
3432  %c = insertelement <8 x half> poison, half %z, i32 0
3433  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3434  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b)
3435  store <8 x half> %e, ptr %x
3436  ret void
3437}
3438
3439define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
3440; ZVFH-LABEL: fma_vf_v6f16:
3441; ZVFH:       # %bb.0:
3442; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3443; ZVFH-NEXT:    vle16.v v8, (a0)
3444; ZVFH-NEXT:    vle16.v v9, (a1)
3445; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
3446; ZVFH-NEXT:    vse16.v v9, (a0)
3447; ZVFH-NEXT:    ret
3448;
3449; ZVFHMIN-LABEL: fma_vf_v6f16:
3450; ZVFHMIN:       # %bb.0:
3451; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3452; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3453; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3454; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3455; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3456; ZVFHMIN-NEXT:    vmv.v.x v10, a1
3457; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
3458; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
3459; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3460; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3461; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
3462; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3463; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3464; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3465; ZVFHMIN-NEXT:    ret
3466  %a = load <6 x half>, ptr %x
3467  %b = load <6 x half>, ptr %y
3468  %c = insertelement <6 x half> poison, half %z, i32 0
3469  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3470  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b)
3471  store <6 x half> %e, ptr %x
3472  ret void
3473}
3474
3475define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) {
3476; CHECK-LABEL: fma_vf_v4f32:
3477; CHECK:       # %bb.0:
3478; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3479; CHECK-NEXT:    vle32.v v8, (a0)
3480; CHECK-NEXT:    vle32.v v9, (a1)
3481; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
3482; CHECK-NEXT:    vse32.v v9, (a0)
3483; CHECK-NEXT:    ret
3484  %a = load <4 x float>, ptr %x
3485  %b = load <4 x float>, ptr %y
3486  %c = insertelement <4 x float> poison, float %z, i32 0
3487  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3488  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b)
3489  store <4 x float> %e, ptr %x
3490  ret void
3491}
3492
3493define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) {
3494; CHECK-LABEL: fma_vf_v2f64:
3495; CHECK:       # %bb.0:
3496; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3497; CHECK-NEXT:    vle64.v v8, (a0)
3498; CHECK-NEXT:    vle64.v v9, (a1)
3499; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
3500; CHECK-NEXT:    vse64.v v9, (a0)
3501; CHECK-NEXT:    ret
3502  %a = load <2 x double>, ptr %x
3503  %b = load <2 x double>, ptr %y
3504  %c = insertelement <2 x double> poison, double %z, i32 0
3505  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3506  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b)
3507  store <2 x double> %e, ptr %x
3508  ret void
3509}
3510
3511define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) {
3512; CHECK-LABEL: fma_fv_v8bf16:
3513; CHECK:       # %bb.0:
3514; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3515; CHECK-NEXT:    vle16.v v8, (a1)
3516; CHECK-NEXT:    vle16.v v9, (a0)
3517; CHECK-NEXT:    fmv.x.w a1, fa0
3518; CHECK-NEXT:    vmv.v.x v10, a1
3519; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
3520; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
3521; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3522; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3523; CHECK-NEXT:    vfmadd.vv v8, v14, v12
3524; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3525; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3526; CHECK-NEXT:    vse16.v v10, (a0)
3527; CHECK-NEXT:    ret
3528  %a = load <8 x bfloat>, ptr %x
3529  %b = load <8 x bfloat>, ptr %y
3530  %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3531  %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3532  %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %d, <8 x bfloat> %a, <8 x bfloat> %b)
3533  store <8 x bfloat> %e, ptr %x
3534  ret void
3535}
3536
3537define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) {
3538; CHECK-LABEL: fma_fv_v6bf16:
3539; CHECK:       # %bb.0:
3540; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3541; CHECK-NEXT:    vle16.v v8, (a1)
3542; CHECK-NEXT:    vle16.v v9, (a0)
3543; CHECK-NEXT:    fmv.x.w a1, fa0
3544; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3545; CHECK-NEXT:    vmv.v.x v10, a1
3546; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
3547; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
3548; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3549; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3550; CHECK-NEXT:    vfmadd.vv v8, v14, v12
3551; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3552; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3553; CHECK-NEXT:    vse16.v v10, (a0)
3554; CHECK-NEXT:    ret
3555  %a = load <6 x bfloat>, ptr %x
3556  %b = load <6 x bfloat>, ptr %y
3557  %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3558  %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3559  %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %d, <6 x bfloat> %a, <6 x bfloat> %b)
3560  store <6 x bfloat> %e, ptr %x
3561  ret void
3562}
3563
3564define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
3565; ZVFH-LABEL: fma_fv_v8f16:
3566; ZVFH:       # %bb.0:
3567; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3568; ZVFH-NEXT:    vle16.v v8, (a0)
3569; ZVFH-NEXT:    vle16.v v9, (a1)
3570; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
3571; ZVFH-NEXT:    vse16.v v9, (a0)
3572; ZVFH-NEXT:    ret
3573;
3574; ZVFHMIN-LABEL: fma_fv_v8f16:
3575; ZVFHMIN:       # %bb.0:
3576; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3577; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3578; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3579; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3580; ZVFHMIN-NEXT:    vmv.v.x v10, a1
3581; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
3582; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
3583; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3584; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3585; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
3586; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3587; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3588; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3589; ZVFHMIN-NEXT:    ret
3590  %a = load <8 x half>, ptr %x
3591  %b = load <8 x half>, ptr %y
3592  %c = insertelement <8 x half> poison, half %z, i32 0
3593  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3594  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b)
3595  store <8 x half> %e, ptr %x
3596  ret void
3597}
3598
3599define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
3600; ZVFH-LABEL: fma_fv_v6f16:
3601; ZVFH:       # %bb.0:
3602; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3603; ZVFH-NEXT:    vle16.v v8, (a0)
3604; ZVFH-NEXT:    vle16.v v9, (a1)
3605; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
3606; ZVFH-NEXT:    vse16.v v9, (a0)
3607; ZVFH-NEXT:    ret
3608;
3609; ZVFHMIN-LABEL: fma_fv_v6f16:
3610; ZVFHMIN:       # %bb.0:
3611; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3612; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3613; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3614; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
3615; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3616; ZVFHMIN-NEXT:    vmv.v.x v10, a1
3617; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
3618; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
3619; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3620; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3621; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
3622; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3623; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3624; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3625; ZVFHMIN-NEXT:    ret
3626  %a = load <6 x half>, ptr %x
3627  %b = load <6 x half>, ptr %y
3628  %c = insertelement <6 x half> poison, half %z, i32 0
3629  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3630  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b)
3631  store <6 x half> %e, ptr %x
3632  ret void
3633}
3634
3635define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) {
3636; CHECK-LABEL: fma_fv_v4f32:
3637; CHECK:       # %bb.0:
3638; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3639; CHECK-NEXT:    vle32.v v8, (a0)
3640; CHECK-NEXT:    vle32.v v9, (a1)
3641; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
3642; CHECK-NEXT:    vse32.v v9, (a0)
3643; CHECK-NEXT:    ret
3644  %a = load <4 x float>, ptr %x
3645  %b = load <4 x float>, ptr %y
3646  %c = insertelement <4 x float> poison, float %z, i32 0
3647  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3648  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b)
3649  store <4 x float> %e, ptr %x
3650  ret void
3651}
3652
3653define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) {
3654; CHECK-LABEL: fma_fv_v2f64:
3655; CHECK:       # %bb.0:
3656; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3657; CHECK-NEXT:    vle64.v v8, (a0)
3658; CHECK-NEXT:    vle64.v v9, (a1)
3659; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
3660; CHECK-NEXT:    vse64.v v9, (a0)
3661; CHECK-NEXT:    ret
3662  %a = load <2 x double>, ptr %x
3663  %b = load <2 x double>, ptr %y
3664  %c = insertelement <2 x double> poison, double %z, i32 0
3665  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3666  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b)
3667  store <2 x double> %e, ptr %x
3668  ret void
3669}
3670
3671define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
3672; CHECK-LABEL: fmsub_vf_v8bf16:
3673; CHECK:       # %bb.0:
3674; CHECK-NEXT:    fmv.x.w a2, fa0
3675; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3676; CHECK-NEXT:    vle16.v v8, (a1)
3677; CHECK-NEXT:    vle16.v v9, (a0)
3678; CHECK-NEXT:    lui a1, 8
3679; CHECK-NEXT:    vmv.v.x v10, a2
3680; CHECK-NEXT:    vxor.vx v8, v8, a1
3681; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3682; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
3683; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3684; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3685; CHECK-NEXT:    vfmadd.vv v8, v12, v14
3686; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3687; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3688; CHECK-NEXT:    vse16.v v10, (a0)
3689; CHECK-NEXT:    ret
3690  %a = load <8 x bfloat>, ptr %x
3691  %b = load <8 x bfloat>, ptr %y
3692  %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3693  %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3694  %neg = fneg <8 x bfloat> %b
3695  %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %neg)
3696  store <8 x bfloat> %e, ptr %x
3697  ret void
3698}
3699
3700define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
3701; CHECK-LABEL: fmsub_vf_v6bf16:
3702; CHECK:       # %bb.0:
3703; CHECK-NEXT:    fmv.x.w a2, fa0
3704; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3705; CHECK-NEXT:    vle16.v v8, (a1)
3706; CHECK-NEXT:    vle16.v v9, (a0)
3707; CHECK-NEXT:    lui a1, 8
3708; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3709; CHECK-NEXT:    vmv.v.x v10, a2
3710; CHECK-NEXT:    vxor.vx v8, v8, a1
3711; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
3712; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
3713; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
3714; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3715; CHECK-NEXT:    vfmadd.vv v8, v12, v14
3716; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3717; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
3718; CHECK-NEXT:    vse16.v v10, (a0)
3719; CHECK-NEXT:    ret
3720  %a = load <6 x bfloat>, ptr %x
3721  %b = load <6 x bfloat>, ptr %y
3722  %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3723  %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3724  %neg = fneg <6 x bfloat> %b
3725  %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %neg)
3726  store <6 x bfloat> %e, ptr %x
3727  ret void
3728}
3729
3730define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
3731; ZVFH-LABEL: fmsub_vf_v8f16:
3732; ZVFH:       # %bb.0:
3733; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3734; ZVFH-NEXT:    vle16.v v8, (a0)
3735; ZVFH-NEXT:    vle16.v v9, (a1)
3736; ZVFH-NEXT:    vfmsac.vf v9, fa0, v8
3737; ZVFH-NEXT:    vse16.v v9, (a0)
3738; ZVFH-NEXT:    ret
3739;
3740; ZVFHMIN-LABEL: fmsub_vf_v8f16:
3741; ZVFHMIN:       # %bb.0:
3742; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
3743; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3744; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3745; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3746; ZVFHMIN-NEXT:    lui a1, 8
3747; ZVFHMIN-NEXT:    vmv.v.x v10, a2
3748; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
3749; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3750; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
3751; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3752; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3753; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
3754; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3755; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3756; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3757; ZVFHMIN-NEXT:    ret
3758  %a = load <8 x half>, ptr %x
3759  %b = load <8 x half>, ptr %y
3760  %c = insertelement <8 x half> poison, half %z, i32 0
3761  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3762  %neg = fneg <8 x half> %b
3763  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg)
3764  store <8 x half> %e, ptr %x
3765  ret void
3766}
3767
3768define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
3769; ZVFH-LABEL: fmsub_vf_v6f16:
3770; ZVFH:       # %bb.0:
3771; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3772; ZVFH-NEXT:    vle16.v v8, (a0)
3773; ZVFH-NEXT:    vle16.v v9, (a1)
3774; ZVFH-NEXT:    vfmsac.vf v9, fa0, v8
3775; ZVFH-NEXT:    vse16.v v9, (a0)
3776; ZVFH-NEXT:    ret
3777;
3778; ZVFHMIN-LABEL: fmsub_vf_v6f16:
3779; ZVFHMIN:       # %bb.0:
3780; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
3781; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3782; ZVFHMIN-NEXT:    vle16.v v8, (a1)
3783; ZVFHMIN-NEXT:    vle16.v v9, (a0)
3784; ZVFHMIN-NEXT:    lui a1, 8
3785; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3786; ZVFHMIN-NEXT:    vmv.v.x v10, a2
3787; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
3788; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
3789; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
3790; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
3791; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3792; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
3793; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3794; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
3795; ZVFHMIN-NEXT:    vse16.v v10, (a0)
3796; ZVFHMIN-NEXT:    ret
3797  %a = load <6 x half>, ptr %x
3798  %b = load <6 x half>, ptr %y
3799  %c = insertelement <6 x half> poison, half %z, i32 0
3800  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3801  %neg = fneg <6 x half> %b
3802  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg)
3803  store <6 x half> %e, ptr %x
3804  ret void
3805}
3806
3807define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) {
3808; CHECK-LABEL: fnmsub_vf_v4f32:
3809; CHECK:       # %bb.0:
3810; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3811; CHECK-NEXT:    vle32.v v8, (a0)
3812; CHECK-NEXT:    vle32.v v9, (a1)
3813; CHECK-NEXT:    vfnmsac.vf v9, fa0, v8
3814; CHECK-NEXT:    vse32.v v9, (a0)
3815; CHECK-NEXT:    ret
3816  %a = load <4 x float>, ptr %x
3817  %b = load <4 x float>, ptr %y
3818  %c = insertelement <4 x float> poison, float %z, i32 0
3819  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3820  %neg = fneg <4 x float> %a
3821  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b)
3822  store <4 x float> %e, ptr %x
3823  ret void
3824}
3825
3826define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) {
3827; CHECK-LABEL: fnmadd_vf_v2f64:
3828; CHECK:       # %bb.0:
3829; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3830; CHECK-NEXT:    vle64.v v8, (a0)
3831; CHECK-NEXT:    vle64.v v9, (a1)
3832; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
3833; CHECK-NEXT:    vse64.v v9, (a0)
3834; CHECK-NEXT:    ret
3835  %a = load <2 x double>, ptr %x
3836  %b = load <2 x double>, ptr %y
3837  %c = insertelement <2 x double> poison, double %z, i32 0
3838  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3839  %neg = fneg <2 x double> %a
3840  %neg2 = fneg <2 x double> %b
3841  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2)
3842  store <2 x double> %e, ptr %x
3843  ret void
3844}
3845
3846define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) {
3847; CHECK-LABEL: fnmsub_fv_v4f32:
3848; CHECK:       # %bb.0:
3849; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3850; CHECK-NEXT:    vle32.v v8, (a0)
3851; CHECK-NEXT:    vle32.v v9, (a1)
3852; CHECK-NEXT:    vfnmsac.vf v9, fa0, v8
3853; CHECK-NEXT:    vse32.v v9, (a0)
3854; CHECK-NEXT:    ret
3855  %a = load <4 x float>, ptr %x
3856  %b = load <4 x float>, ptr %y
3857  %c = insertelement <4 x float> poison, float %z, i32 0
3858  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3859  %neg = fneg <4 x float> %d
3860  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b)
3861  store <4 x float> %e, ptr %x
3862  ret void
3863}
3864
3865define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) {
3866; CHECK-LABEL: fnmadd_fv_v2f64:
3867; CHECK:       # %bb.0:
3868; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3869; CHECK-NEXT:    vle64.v v8, (a0)
3870; CHECK-NEXT:    vle64.v v9, (a1)
3871; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
3872; CHECK-NEXT:    vse64.v v9, (a0)
3873; CHECK-NEXT:    ret
3874  %a = load <2 x double>, ptr %x
3875  %b = load <2 x double>, ptr %y
3876  %c = insertelement <2 x double> poison, double %z, i32 0
3877  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3878  %neg = fneg <2 x double> %d
3879  %neg2 = fneg <2 x double> %b
3880  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2)
3881  store <2 x double> %e, ptr %x
3882  ret void
3883}
3884
3885define void @trunc_v8bf16(ptr %x) {
3886; CHECK-LABEL: trunc_v8bf16:
3887; CHECK:       # %bb.0:
3888; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3889; CHECK-NEXT:    vle16.v v8, (a0)
3890; CHECK-NEXT:    lui a1, 307200
3891; CHECK-NEXT:    fmv.w.x fa5, a1
3892; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3893; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3894; CHECK-NEXT:    vfabs.v v8, v10
3895; CHECK-NEXT:    vmflt.vf v0, v8, fa5
3896; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
3897; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
3898; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
3899; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
3900; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3901; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
3902; CHECK-NEXT:    vse16.v v8, (a0)
3903; CHECK-NEXT:    ret
3904  %a = load <8 x bfloat>, ptr %x
3905  %b = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> %a)
3906  store <8 x bfloat> %b, ptr %x
3907  ret void
3908}
3909
3910define void @trunc_v6bf16(ptr %x) {
3911; CHECK-LABEL: trunc_v6bf16:
3912; CHECK:       # %bb.0:
3913; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3914; CHECK-NEXT:    vle16.v v8, (a0)
3915; CHECK-NEXT:    lui a1, 307200
3916; CHECK-NEXT:    fmv.w.x fa5, a1
3917; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3918; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
3919; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3920; CHECK-NEXT:    vfabs.v v8, v10
3921; CHECK-NEXT:    vmflt.vf v0, v8, fa5
3922; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
3923; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
3924; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
3925; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
3926; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
3927; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3928; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
3929; CHECK-NEXT:    vse16.v v8, (a0)
3930; CHECK-NEXT:    ret
3931  %a = load <6 x bfloat>, ptr %x
3932  %b = call <6 x bfloat> @llvm.trunc.v6bf16(<6 x bfloat> %a)
3933  store <6 x bfloat> %b, ptr %x
3934  ret void
3935}
3936
3937define void @trunc_v8f16(ptr %x) {
3938; ZVFH-LABEL: trunc_v8f16:
3939; ZVFH:       # %bb.0:
3940; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3941; ZVFH-NEXT:    vle16.v v8, (a0)
3942; ZVFH-NEXT:    lui a1, %hi(.LCPI171_0)
3943; ZVFH-NEXT:    flh fa5, %lo(.LCPI171_0)(a1)
3944; ZVFH-NEXT:    vfabs.v v9, v8
3945; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
3946; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
3947; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
3948; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
3949; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
3950; ZVFH-NEXT:    vse16.v v8, (a0)
3951; ZVFH-NEXT:    ret
3952;
3953; ZVFHMIN-LABEL: trunc_v8f16:
3954; ZVFHMIN:       # %bb.0:
3955; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3956; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3957; ZVFHMIN-NEXT:    lui a1, 307200
3958; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
3959; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
3960; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3961; ZVFHMIN-NEXT:    vfabs.v v8, v10
3962; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
3963; ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
3964; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
3965; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
3966; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
3967; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3968; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
3969; ZVFHMIN-NEXT:    vse16.v v8, (a0)
3970; ZVFHMIN-NEXT:    ret
3971  %a = load <8 x half>, ptr %x
3972  %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
3973  store <8 x half> %b, ptr %x
3974  ret void
3975}
3976
3977define void @trunc_v6f16(ptr %x) {
3978; ZVFH-LABEL: trunc_v6f16:
3979; ZVFH:       # %bb.0:
3980; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3981; ZVFH-NEXT:    vle16.v v8, (a0)
3982; ZVFH-NEXT:    lui a1, %hi(.LCPI172_0)
3983; ZVFH-NEXT:    flh fa5, %lo(.LCPI172_0)(a1)
3984; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3985; ZVFH-NEXT:    vfabs.v v9, v8
3986; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
3987; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3988; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
3989; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
3990; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
3991; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
3992; ZVFH-NEXT:    vse16.v v8, (a0)
3993; ZVFH-NEXT:    ret
3994;
3995; ZVFHMIN-LABEL: trunc_v6f16:
3996; ZVFHMIN:       # %bb.0:
3997; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
3998; ZVFHMIN-NEXT:    vle16.v v8, (a0)
3999; ZVFHMIN-NEXT:    lui a1, 307200
4000; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4001; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4002; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4003; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4004; ZVFHMIN-NEXT:    vfabs.v v8, v10
4005; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4006; ZVFHMIN-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4007; ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
4008; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4009; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4010; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4011; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4012; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4013; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4014; ZVFHMIN-NEXT:    ret
4015  %a = load <6 x half>, ptr %x
4016  %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
4017  store <6 x half> %b, ptr %x
4018  ret void
4019}
4020
4021define void @trunc_v4f32(ptr %x) {
4022; CHECK-LABEL: trunc_v4f32:
4023; CHECK:       # %bb.0:
4024; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4025; CHECK-NEXT:    vle32.v v8, (a0)
4026; CHECK-NEXT:    lui a1, 307200
4027; CHECK-NEXT:    fmv.w.x fa5, a1
4028; CHECK-NEXT:    vfabs.v v9, v8
4029; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4030; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
4031; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4032; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4033; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4034; CHECK-NEXT:    vse32.v v8, (a0)
4035; CHECK-NEXT:    ret
4036  %a = load <4 x float>, ptr %x
4037  %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
4038  store <4 x float> %b, ptr %x
4039  ret void
4040}
4041
4042define void @trunc_v2f64(ptr %x) {
4043; CHECK-LABEL: trunc_v2f64:
4044; CHECK:       # %bb.0:
4045; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4046; CHECK-NEXT:    vle64.v v8, (a0)
4047; CHECK-NEXT:    lui a1, %hi(.LCPI174_0)
4048; CHECK-NEXT:    fld fa5, %lo(.LCPI174_0)(a1)
4049; CHECK-NEXT:    vfabs.v v9, v8
4050; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4051; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
4052; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4053; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4054; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4055; CHECK-NEXT:    vse64.v v8, (a0)
4056; CHECK-NEXT:    ret
4057  %a = load <2 x double>, ptr %x
4058  %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
4059  store <2 x double> %b, ptr %x
4060  ret void
4061}
4062
4063define void @ceil_v8bf16(ptr %x) {
4064; CHECK-LABEL: ceil_v8bf16:
4065; CHECK:       # %bb.0:
4066; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4067; CHECK-NEXT:    vle16.v v8, (a0)
4068; CHECK-NEXT:    lui a1, 307200
4069; CHECK-NEXT:    fmv.w.x fa5, a1
4070; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4071; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4072; CHECK-NEXT:    vfabs.v v8, v10
4073; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4074; CHECK-NEXT:    fsrmi a1, 3
4075; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4076; CHECK-NEXT:    fsrm a1
4077; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4078; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4079; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4080; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4081; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4082; CHECK-NEXT:    vse16.v v8, (a0)
4083; CHECK-NEXT:    ret
4084  %a = load <8 x bfloat>, ptr %x
4085  %b = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> %a)
4086  store <8 x bfloat> %b, ptr %x
4087  ret void
4088}
4089
4090define void @ceil_v6bf16(ptr %x) {
4091; CHECK-LABEL: ceil_v6bf16:
4092; CHECK:       # %bb.0:
4093; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4094; CHECK-NEXT:    vle16.v v8, (a0)
4095; CHECK-NEXT:    lui a1, 307200
4096; CHECK-NEXT:    fmv.w.x fa5, a1
4097; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4098; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4099; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4100; CHECK-NEXT:    vfabs.v v8, v10
4101; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4102; CHECK-NEXT:    fsrmi a1, 3
4103; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4104; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4105; CHECK-NEXT:    fsrm a1
4106; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4107; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4108; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4109; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4110; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4111; CHECK-NEXT:    vse16.v v8, (a0)
4112; CHECK-NEXT:    ret
4113  %a = load <6 x bfloat>, ptr %x
4114  %b = call <6 x bfloat> @llvm.ceil.v6bf16(<6 x bfloat> %a)
4115  store <6 x bfloat> %b, ptr %x
4116  ret void
4117}
4118
4119define void @ceil_v8f16(ptr %x) {
4120; ZVFH-LABEL: ceil_v8f16:
4121; ZVFH:       # %bb.0:
4122; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4123; ZVFH-NEXT:    vle16.v v8, (a0)
4124; ZVFH-NEXT:    lui a1, %hi(.LCPI177_0)
4125; ZVFH-NEXT:    flh fa5, %lo(.LCPI177_0)(a1)
4126; ZVFH-NEXT:    vfabs.v v9, v8
4127; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4128; ZVFH-NEXT:    fsrmi a1, 3
4129; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4130; ZVFH-NEXT:    fsrm a1
4131; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4132; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4133; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4134; ZVFH-NEXT:    vse16.v v8, (a0)
4135; ZVFH-NEXT:    ret
4136;
4137; ZVFHMIN-LABEL: ceil_v8f16:
4138; ZVFHMIN:       # %bb.0:
4139; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4140; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4141; ZVFHMIN-NEXT:    lui a1, 307200
4142; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4143; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4144; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4145; ZVFHMIN-NEXT:    vfabs.v v8, v10
4146; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4147; ZVFHMIN-NEXT:    fsrmi a1, 3
4148; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4149; ZVFHMIN-NEXT:    fsrm a1
4150; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4151; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4152; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4153; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4154; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4155; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4156; ZVFHMIN-NEXT:    ret
4157  %a = load <8 x half>, ptr %x
4158  %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
4159  store <8 x half> %b, ptr %x
4160  ret void
4161}
4162
4163define void @ceil_v6f16(ptr %x) {
4164; ZVFH-LABEL: ceil_v6f16:
4165; ZVFH:       # %bb.0:
4166; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4167; ZVFH-NEXT:    vle16.v v8, (a0)
4168; ZVFH-NEXT:    lui a1, %hi(.LCPI178_0)
4169; ZVFH-NEXT:    flh fa5, %lo(.LCPI178_0)(a1)
4170; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4171; ZVFH-NEXT:    vfabs.v v9, v8
4172; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4173; ZVFH-NEXT:    fsrmi a1, 3
4174; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4175; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4176; ZVFH-NEXT:    fsrm a1
4177; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4178; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4179; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4180; ZVFH-NEXT:    vse16.v v8, (a0)
4181; ZVFH-NEXT:    ret
4182;
4183; ZVFHMIN-LABEL: ceil_v6f16:
4184; ZVFHMIN:       # %bb.0:
4185; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4186; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4187; ZVFHMIN-NEXT:    lui a1, 307200
4188; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4189; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4190; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4191; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4192; ZVFHMIN-NEXT:    vfabs.v v8, v10
4193; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4194; ZVFHMIN-NEXT:    fsrmi a1, 3
4195; ZVFHMIN-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4196; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4197; ZVFHMIN-NEXT:    fsrm a1
4198; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4199; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4200; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4201; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4202; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4203; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4204; ZVFHMIN-NEXT:    ret
4205  %a = load <6 x half>, ptr %x
4206  %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
4207  store <6 x half> %b, ptr %x
4208  ret void
4209}
4210
4211define void @ceil_v4f32(ptr %x) {
4212; CHECK-LABEL: ceil_v4f32:
4213; CHECK:       # %bb.0:
4214; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4215; CHECK-NEXT:    vle32.v v8, (a0)
4216; CHECK-NEXT:    lui a1, 307200
4217; CHECK-NEXT:    fmv.w.x fa5, a1
4218; CHECK-NEXT:    vfabs.v v9, v8
4219; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4220; CHECK-NEXT:    fsrmi a1, 3
4221; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4222; CHECK-NEXT:    fsrm a1
4223; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4224; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4225; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4226; CHECK-NEXT:    vse32.v v8, (a0)
4227; CHECK-NEXT:    ret
4228  %a = load <4 x float>, ptr %x
4229  %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
4230  store <4 x float> %b, ptr %x
4231  ret void
4232}
4233
4234define void @ceil_v2f64(ptr %x) {
4235; CHECK-LABEL: ceil_v2f64:
4236; CHECK:       # %bb.0:
4237; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4238; CHECK-NEXT:    vle64.v v8, (a0)
4239; CHECK-NEXT:    lui a1, %hi(.LCPI180_0)
4240; CHECK-NEXT:    fld fa5, %lo(.LCPI180_0)(a1)
4241; CHECK-NEXT:    vfabs.v v9, v8
4242; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4243; CHECK-NEXT:    fsrmi a1, 3
4244; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4245; CHECK-NEXT:    fsrm a1
4246; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4247; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4248; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4249; CHECK-NEXT:    vse64.v v8, (a0)
4250; CHECK-NEXT:    ret
4251  %a = load <2 x double>, ptr %x
4252  %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
4253  store <2 x double> %b, ptr %x
4254  ret void
4255}
4256
4257define void @floor_v8bf16(ptr %x) {
4258; CHECK-LABEL: floor_v8bf16:
4259; CHECK:       # %bb.0:
4260; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4261; CHECK-NEXT:    vle16.v v8, (a0)
4262; CHECK-NEXT:    lui a1, 307200
4263; CHECK-NEXT:    fmv.w.x fa5, a1
4264; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4265; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4266; CHECK-NEXT:    vfabs.v v8, v10
4267; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4268; CHECK-NEXT:    fsrmi a1, 2
4269; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4270; CHECK-NEXT:    fsrm a1
4271; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4272; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4273; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4274; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4275; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4276; CHECK-NEXT:    vse16.v v8, (a0)
4277; CHECK-NEXT:    ret
4278  %a = load <8 x bfloat>, ptr %x
4279  %b = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> %a)
4280  store <8 x bfloat> %b, ptr %x
4281  ret void
4282}
4283
4284define void @floor_v6bf16(ptr %x) {
4285; CHECK-LABEL: floor_v6bf16:
4286; CHECK:       # %bb.0:
4287; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4288; CHECK-NEXT:    vle16.v v8, (a0)
4289; CHECK-NEXT:    lui a1, 307200
4290; CHECK-NEXT:    fmv.w.x fa5, a1
4291; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4292; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4293; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4294; CHECK-NEXT:    vfabs.v v8, v10
4295; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4296; CHECK-NEXT:    fsrmi a1, 2
4297; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4298; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4299; CHECK-NEXT:    fsrm a1
4300; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4301; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4302; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4303; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4304; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4305; CHECK-NEXT:    vse16.v v8, (a0)
4306; CHECK-NEXT:    ret
4307  %a = load <6 x bfloat>, ptr %x
4308  %b = call <6 x bfloat> @llvm.floor.v6bf16(<6 x bfloat> %a)
4309  store <6 x bfloat> %b, ptr %x
4310  ret void
4311}
4312
4313define void @floor_v8f16(ptr %x) {
4314; ZVFH-LABEL: floor_v8f16:
4315; ZVFH:       # %bb.0:
4316; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4317; ZVFH-NEXT:    vle16.v v8, (a0)
4318; ZVFH-NEXT:    lui a1, %hi(.LCPI183_0)
4319; ZVFH-NEXT:    flh fa5, %lo(.LCPI183_0)(a1)
4320; ZVFH-NEXT:    vfabs.v v9, v8
4321; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4322; ZVFH-NEXT:    fsrmi a1, 2
4323; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4324; ZVFH-NEXT:    fsrm a1
4325; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4326; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4327; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4328; ZVFH-NEXT:    vse16.v v8, (a0)
4329; ZVFH-NEXT:    ret
4330;
4331; ZVFHMIN-LABEL: floor_v8f16:
4332; ZVFHMIN:       # %bb.0:
4333; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4334; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4335; ZVFHMIN-NEXT:    lui a1, 307200
4336; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4337; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4338; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4339; ZVFHMIN-NEXT:    vfabs.v v8, v10
4340; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4341; ZVFHMIN-NEXT:    fsrmi a1, 2
4342; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4343; ZVFHMIN-NEXT:    fsrm a1
4344; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4345; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4346; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4347; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4348; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4349; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4350; ZVFHMIN-NEXT:    ret
4351  %a = load <8 x half>, ptr %x
4352  %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
4353  store <8 x half> %b, ptr %x
4354  ret void
4355}
4356
4357define void @floor_v6f16(ptr %x) {
4358; ZVFH-LABEL: floor_v6f16:
4359; ZVFH:       # %bb.0:
4360; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4361; ZVFH-NEXT:    vle16.v v8, (a0)
4362; ZVFH-NEXT:    lui a1, %hi(.LCPI184_0)
4363; ZVFH-NEXT:    flh fa5, %lo(.LCPI184_0)(a1)
4364; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4365; ZVFH-NEXT:    vfabs.v v9, v8
4366; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4367; ZVFH-NEXT:    fsrmi a1, 2
4368; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4369; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4370; ZVFH-NEXT:    fsrm a1
4371; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4372; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4373; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4374; ZVFH-NEXT:    vse16.v v8, (a0)
4375; ZVFH-NEXT:    ret
4376;
4377; ZVFHMIN-LABEL: floor_v6f16:
4378; ZVFHMIN:       # %bb.0:
4379; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4380; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4381; ZVFHMIN-NEXT:    lui a1, 307200
4382; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4383; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4384; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4385; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4386; ZVFHMIN-NEXT:    vfabs.v v8, v10
4387; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4388; ZVFHMIN-NEXT:    fsrmi a1, 2
4389; ZVFHMIN-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4390; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4391; ZVFHMIN-NEXT:    fsrm a1
4392; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4393; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4394; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4395; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4396; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4397; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4398; ZVFHMIN-NEXT:    ret
4399  %a = load <6 x half>, ptr %x
4400  %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
4401  store <6 x half> %b, ptr %x
4402  ret void
4403}
4404
4405define void @floor_v4f32(ptr %x) {
4406; CHECK-LABEL: floor_v4f32:
4407; CHECK:       # %bb.0:
4408; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4409; CHECK-NEXT:    vle32.v v8, (a0)
4410; CHECK-NEXT:    lui a1, 307200
4411; CHECK-NEXT:    fmv.w.x fa5, a1
4412; CHECK-NEXT:    vfabs.v v9, v8
4413; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4414; CHECK-NEXT:    fsrmi a1, 2
4415; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4416; CHECK-NEXT:    fsrm a1
4417; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4418; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4419; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4420; CHECK-NEXT:    vse32.v v8, (a0)
4421; CHECK-NEXT:    ret
4422  %a = load <4 x float>, ptr %x
4423  %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
4424  store <4 x float> %b, ptr %x
4425  ret void
4426}
4427
4428define void @floor_v2f64(ptr %x) {
4429; CHECK-LABEL: floor_v2f64:
4430; CHECK:       # %bb.0:
4431; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4432; CHECK-NEXT:    vle64.v v8, (a0)
4433; CHECK-NEXT:    lui a1, %hi(.LCPI186_0)
4434; CHECK-NEXT:    fld fa5, %lo(.LCPI186_0)(a1)
4435; CHECK-NEXT:    vfabs.v v9, v8
4436; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4437; CHECK-NEXT:    fsrmi a1, 2
4438; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4439; CHECK-NEXT:    fsrm a1
4440; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4441; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4442; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4443; CHECK-NEXT:    vse64.v v8, (a0)
4444; CHECK-NEXT:    ret
4445  %a = load <2 x double>, ptr %x
4446  %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
4447  store <2 x double> %b, ptr %x
4448  ret void
4449}
4450
4451define void @round_v8bf16(ptr %x) {
4452; CHECK-LABEL: round_v8bf16:
4453; CHECK:       # %bb.0:
4454; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4455; CHECK-NEXT:    vle16.v v8, (a0)
4456; CHECK-NEXT:    lui a1, 307200
4457; CHECK-NEXT:    fmv.w.x fa5, a1
4458; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4459; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4460; CHECK-NEXT:    vfabs.v v8, v10
4461; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4462; CHECK-NEXT:    fsrmi a1, 4
4463; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4464; CHECK-NEXT:    fsrm a1
4465; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4466; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4467; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4468; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4469; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4470; CHECK-NEXT:    vse16.v v8, (a0)
4471; CHECK-NEXT:    ret
4472  %a = load <8 x bfloat>, ptr %x
4473  %b = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> %a)
4474  store <8 x bfloat> %b, ptr %x
4475  ret void
4476}
4477
4478define void @round_v6bf16(ptr %x) {
4479; CHECK-LABEL: round_v6bf16:
4480; CHECK:       # %bb.0:
4481; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4482; CHECK-NEXT:    vle16.v v8, (a0)
4483; CHECK-NEXT:    lui a1, 307200
4484; CHECK-NEXT:    fmv.w.x fa5, a1
4485; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4486; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4487; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4488; CHECK-NEXT:    vfabs.v v8, v10
4489; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4490; CHECK-NEXT:    fsrmi a1, 4
4491; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4492; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4493; CHECK-NEXT:    fsrm a1
4494; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4495; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4496; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4497; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4498; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4499; CHECK-NEXT:    vse16.v v8, (a0)
4500; CHECK-NEXT:    ret
4501  %a = load <6 x bfloat>, ptr %x
4502  %b = call <6 x bfloat> @llvm.round.v6bf16(<6 x bfloat> %a)
4503  store <6 x bfloat> %b, ptr %x
4504  ret void
4505}
4506
4507define void @round_v8f16(ptr %x) {
4508; ZVFH-LABEL: round_v8f16:
4509; ZVFH:       # %bb.0:
4510; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4511; ZVFH-NEXT:    vle16.v v8, (a0)
4512; ZVFH-NEXT:    lui a1, %hi(.LCPI189_0)
4513; ZVFH-NEXT:    flh fa5, %lo(.LCPI189_0)(a1)
4514; ZVFH-NEXT:    vfabs.v v9, v8
4515; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4516; ZVFH-NEXT:    fsrmi a1, 4
4517; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4518; ZVFH-NEXT:    fsrm a1
4519; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4520; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4521; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4522; ZVFH-NEXT:    vse16.v v8, (a0)
4523; ZVFH-NEXT:    ret
4524;
4525; ZVFHMIN-LABEL: round_v8f16:
4526; ZVFHMIN:       # %bb.0:
4527; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4528; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4529; ZVFHMIN-NEXT:    lui a1, 307200
4530; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4531; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4532; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4533; ZVFHMIN-NEXT:    vfabs.v v8, v10
4534; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4535; ZVFHMIN-NEXT:    fsrmi a1, 4
4536; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4537; ZVFHMIN-NEXT:    fsrm a1
4538; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4539; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4540; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4541; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4542; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4543; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4544; ZVFHMIN-NEXT:    ret
4545  %a = load <8 x half>, ptr %x
4546  %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
4547  store <8 x half> %b, ptr %x
4548  ret void
4549}
4550
4551define void @round_v6f16(ptr %x) {
4552; ZVFH-LABEL: round_v6f16:
4553; ZVFH:       # %bb.0:
4554; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4555; ZVFH-NEXT:    vle16.v v8, (a0)
4556; ZVFH-NEXT:    lui a1, %hi(.LCPI190_0)
4557; ZVFH-NEXT:    flh fa5, %lo(.LCPI190_0)(a1)
4558; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4559; ZVFH-NEXT:    vfabs.v v9, v8
4560; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4561; ZVFH-NEXT:    fsrmi a1, 4
4562; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4563; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4564; ZVFH-NEXT:    fsrm a1
4565; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4566; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4567; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4568; ZVFH-NEXT:    vse16.v v8, (a0)
4569; ZVFH-NEXT:    ret
4570;
4571; ZVFHMIN-LABEL: round_v6f16:
4572; ZVFHMIN:       # %bb.0:
4573; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4574; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4575; ZVFHMIN-NEXT:    lui a1, 307200
4576; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4577; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4578; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4579; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4580; ZVFHMIN-NEXT:    vfabs.v v8, v10
4581; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4582; ZVFHMIN-NEXT:    fsrmi a1, 4
4583; ZVFHMIN-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
4584; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4585; ZVFHMIN-NEXT:    fsrm a1
4586; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4587; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4588; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4589; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4590; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4591; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4592; ZVFHMIN-NEXT:    ret
4593  %a = load <6 x half>, ptr %x
4594  %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
4595  store <6 x half> %b, ptr %x
4596  ret void
4597}
4598
4599define void @round_v4f32(ptr %x) {
4600; CHECK-LABEL: round_v4f32:
4601; CHECK:       # %bb.0:
4602; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4603; CHECK-NEXT:    vle32.v v8, (a0)
4604; CHECK-NEXT:    lui a1, 307200
4605; CHECK-NEXT:    fmv.w.x fa5, a1
4606; CHECK-NEXT:    vfabs.v v9, v8
4607; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4608; CHECK-NEXT:    fsrmi a1, 4
4609; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4610; CHECK-NEXT:    fsrm a1
4611; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4612; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4613; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4614; CHECK-NEXT:    vse32.v v8, (a0)
4615; CHECK-NEXT:    ret
4616  %a = load <4 x float>, ptr %x
4617  %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
4618  store <4 x float> %b, ptr %x
4619  ret void
4620}
4621
4622define void @round_v2f64(ptr %x) {
4623; CHECK-LABEL: round_v2f64:
4624; CHECK:       # %bb.0:
4625; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4626; CHECK-NEXT:    vle64.v v8, (a0)
4627; CHECK-NEXT:    lui a1, %hi(.LCPI192_0)
4628; CHECK-NEXT:    fld fa5, %lo(.LCPI192_0)(a1)
4629; CHECK-NEXT:    vfabs.v v9, v8
4630; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4631; CHECK-NEXT:    fsrmi a1, 4
4632; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4633; CHECK-NEXT:    fsrm a1
4634; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4635; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4636; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4637; CHECK-NEXT:    vse64.v v8, (a0)
4638; CHECK-NEXT:    ret
4639  %a = load <2 x double>, ptr %x
4640  %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
4641  store <2 x double> %b, ptr %x
4642  ret void
4643}
4644
4645define void @rint_v8bf16(ptr %x) {
4646; CHECK-LABEL: rint_v8bf16:
4647; CHECK:       # %bb.0:
4648; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4649; CHECK-NEXT:    vle16.v v8, (a0)
4650; CHECK-NEXT:    lui a1, 307200
4651; CHECK-NEXT:    fmv.w.x fa5, a1
4652; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4653; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4654; CHECK-NEXT:    vfabs.v v8, v10
4655; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4656; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4657; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4658; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4659; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4660; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4661; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4662; CHECK-NEXT:    vse16.v v8, (a0)
4663; CHECK-NEXT:    ret
4664  %a = load <8 x bfloat>, ptr %x
4665  %b = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> %a)
4666  store <8 x bfloat> %b, ptr %x
4667  ret void
4668}
4669
4670define void @rint_v8f16(ptr %x) {
4671; ZVFH-LABEL: rint_v8f16:
4672; ZVFH:       # %bb.0:
4673; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4674; ZVFH-NEXT:    vle16.v v8, (a0)
4675; ZVFH-NEXT:    lui a1, %hi(.LCPI194_0)
4676; ZVFH-NEXT:    flh fa5, %lo(.LCPI194_0)(a1)
4677; ZVFH-NEXT:    vfabs.v v9, v8
4678; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4679; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4680; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4681; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4682; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4683; ZVFH-NEXT:    vse16.v v8, (a0)
4684; ZVFH-NEXT:    ret
4685;
4686; ZVFHMIN-LABEL: rint_v8f16:
4687; ZVFHMIN:       # %bb.0:
4688; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4689; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4690; ZVFHMIN-NEXT:    lui a1, 307200
4691; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4692; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4693; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4694; ZVFHMIN-NEXT:    vfabs.v v8, v10
4695; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4696; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4697; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4698; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4699; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4700; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4701; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4702; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4703; ZVFHMIN-NEXT:    ret
4704  %a = load <8 x half>, ptr %x
4705  %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
4706  store <8 x half> %b, ptr %x
4707  ret void
4708}
4709
4710define void @rint_v4f32(ptr %x) {
4711; CHECK-LABEL: rint_v4f32:
4712; CHECK:       # %bb.0:
4713; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4714; CHECK-NEXT:    vle32.v v8, (a0)
4715; CHECK-NEXT:    lui a1, 307200
4716; CHECK-NEXT:    fmv.w.x fa5, a1
4717; CHECK-NEXT:    vfabs.v v9, v8
4718; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4719; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4720; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4721; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4722; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4723; CHECK-NEXT:    vse32.v v8, (a0)
4724; CHECK-NEXT:    ret
4725  %a = load <4 x float>, ptr %x
4726  %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
4727  store <4 x float> %b, ptr %x
4728  ret void
4729}
4730
4731define void @rint_v2f64(ptr %x) {
4732; CHECK-LABEL: rint_v2f64:
4733; CHECK:       # %bb.0:
4734; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4735; CHECK-NEXT:    vle64.v v8, (a0)
4736; CHECK-NEXT:    lui a1, %hi(.LCPI196_0)
4737; CHECK-NEXT:    fld fa5, %lo(.LCPI196_0)(a1)
4738; CHECK-NEXT:    vfabs.v v9, v8
4739; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4740; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4741; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4742; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4743; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4744; CHECK-NEXT:    vse64.v v8, (a0)
4745; CHECK-NEXT:    ret
4746  %a = load <2 x double>, ptr %x
4747  %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
4748  store <2 x double> %b, ptr %x
4749  ret void
4750}
4751
4752define void @nearbyint_v8bf16(ptr %x) {
4753; CHECK-LABEL: nearbyint_v8bf16:
4754; CHECK:       # %bb.0:
4755; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4756; CHECK-NEXT:    vle16.v v8, (a0)
4757; CHECK-NEXT:    lui a1, 307200
4758; CHECK-NEXT:    fmv.w.x fa5, a1
4759; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
4760; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4761; CHECK-NEXT:    vfabs.v v8, v10
4762; CHECK-NEXT:    vmflt.vf v0, v8, fa5
4763; CHECK-NEXT:    frflags a1
4764; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4765; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4766; CHECK-NEXT:    fsflags a1
4767; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4768; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4769; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4770; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
4771; CHECK-NEXT:    vse16.v v8, (a0)
4772; CHECK-NEXT:    ret
4773  %a = load <8 x bfloat>, ptr %x
4774  %b = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> %a)
4775  store <8 x bfloat> %b, ptr %x
4776  ret void
4777}
4778
4779define void @nearbyint_v8f16(ptr %x) {
4780; ZVFH-LABEL: nearbyint_v8f16:
4781; ZVFH:       # %bb.0:
4782; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4783; ZVFH-NEXT:    vle16.v v8, (a0)
4784; ZVFH-NEXT:    lui a1, %hi(.LCPI198_0)
4785; ZVFH-NEXT:    flh fa5, %lo(.LCPI198_0)(a1)
4786; ZVFH-NEXT:    vfabs.v v9, v8
4787; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
4788; ZVFH-NEXT:    frflags a1
4789; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4790; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4791; ZVFH-NEXT:    fsflags a1
4792; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
4793; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4794; ZVFH-NEXT:    vse16.v v8, (a0)
4795; ZVFH-NEXT:    ret
4796;
4797; ZVFHMIN-LABEL: nearbyint_v8f16:
4798; ZVFHMIN:       # %bb.0:
4799; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4800; ZVFHMIN-NEXT:    vle16.v v8, (a0)
4801; ZVFHMIN-NEXT:    lui a1, 307200
4802; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
4803; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
4804; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4805; ZVFHMIN-NEXT:    vfabs.v v8, v10
4806; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
4807; ZVFHMIN-NEXT:    frflags a1
4808; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
4809; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
4810; ZVFHMIN-NEXT:    fsflags a1
4811; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
4812; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
4813; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4814; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
4815; ZVFHMIN-NEXT:    vse16.v v8, (a0)
4816; ZVFHMIN-NEXT:    ret
4817  %a = load <8 x half>, ptr %x
4818  %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
4819  store <8 x half> %b, ptr %x
4820  ret void
4821}
4822
4823define void @nearbyint_v4f32(ptr %x) {
4824; CHECK-LABEL: nearbyint_v4f32:
4825; CHECK:       # %bb.0:
4826; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4827; CHECK-NEXT:    vle32.v v8, (a0)
4828; CHECK-NEXT:    lui a1, 307200
4829; CHECK-NEXT:    fmv.w.x fa5, a1
4830; CHECK-NEXT:    vfabs.v v9, v8
4831; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4832; CHECK-NEXT:    frflags a1
4833; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4834; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4835; CHECK-NEXT:    fsflags a1
4836; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
4837; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4838; CHECK-NEXT:    vse32.v v8, (a0)
4839; CHECK-NEXT:    ret
4840  %a = load <4 x float>, ptr %x
4841  %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
4842  store <4 x float> %b, ptr %x
4843  ret void
4844}
4845
4846define void @nearbyint_v2f64(ptr %x) {
4847; CHECK-LABEL: nearbyint_v2f64:
4848; CHECK:       # %bb.0:
4849; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4850; CHECK-NEXT:    vle64.v v8, (a0)
4851; CHECK-NEXT:    lui a1, %hi(.LCPI200_0)
4852; CHECK-NEXT:    fld fa5, %lo(.LCPI200_0)(a1)
4853; CHECK-NEXT:    vfabs.v v9, v8
4854; CHECK-NEXT:    vmflt.vf v0, v9, fa5
4855; CHECK-NEXT:    frflags a1
4856; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
4857; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
4858; CHECK-NEXT:    fsflags a1
4859; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
4860; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
4861; CHECK-NEXT:    vse64.v v8, (a0)
4862; CHECK-NEXT:    ret
4863  %a = load <2 x double>, ptr %x
4864  %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
4865  store <2 x double> %b, ptr %x
4866  ret void
4867}
4868
4869define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
4870; CHECK-LABEL: fmuladd_v8bf16:
4871; CHECK:       # %bb.0:
4872; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4873; CHECK-NEXT:    vle16.v v8, (a1)
4874; CHECK-NEXT:    vle16.v v9, (a0)
4875; CHECK-NEXT:    vle16.v v10, (a2)
4876; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
4877; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
4878; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4879; CHECK-NEXT:    vfmul.vv v8, v14, v12
4880; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4881; CHECK-NEXT:    vfncvtbf16.f.f.w v11, v8
4882; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v11
4883; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
4884; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4885; CHECK-NEXT:    vfadd.vv v8, v8, v12
4886; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4887; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
4888; CHECK-NEXT:    vse16.v v10, (a0)
4889; CHECK-NEXT:    ret
4890  %a = load <8 x bfloat>, ptr %x
4891  %b = load <8 x bfloat>, ptr %y
4892  %c = load <8 x bfloat>, ptr %z
4893  %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
4894  store <8 x bfloat> %d, ptr %x
4895  ret void
4896}
4897
4898define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
4899; CHECK-LABEL: fmuladd_v6bf16:
4900; CHECK:       # %bb.0:
4901; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4902; CHECK-NEXT:    vle16.v v8, (a1)
4903; CHECK-NEXT:    vle16.v v9, (a0)
4904; CHECK-NEXT:    vle16.v v10, (a2)
4905; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
4906; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
4907; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4908; CHECK-NEXT:    vfmul.vv v8, v14, v12
4909; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4910; CHECK-NEXT:    vfncvtbf16.f.f.w v11, v8
4911; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v11
4912; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
4913; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4914; CHECK-NEXT:    vfadd.vv v8, v8, v12
4915; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4916; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
4917; CHECK-NEXT:    vse16.v v10, (a0)
4918; CHECK-NEXT:    ret
4919  %a = load <6 x bfloat>, ptr %x
4920  %b = load <6 x bfloat>, ptr %y
4921  %c = load <6 x bfloat>, ptr %z
4922  %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c)
4923  store <6 x bfloat> %d, ptr %x
4924  ret void
4925}
4926
4927define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
4928; ZVFH-LABEL: fmuladd_v8f16:
4929; ZVFH:       # %bb.0:
4930; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4931; ZVFH-NEXT:    vle16.v v8, (a0)
4932; ZVFH-NEXT:    vle16.v v9, (a1)
4933; ZVFH-NEXT:    vle16.v v10, (a2)
4934; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
4935; ZVFH-NEXT:    vse16.v v10, (a0)
4936; ZVFH-NEXT:    ret
4937;
4938; ZVFHMIN-LABEL: fmuladd_v8f16:
4939; ZVFHMIN:       # %bb.0:
4940; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4941; ZVFHMIN-NEXT:    vle16.v v8, (a1)
4942; ZVFHMIN-NEXT:    vle16.v v9, (a0)
4943; ZVFHMIN-NEXT:    vle16.v v10, (a2)
4944; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
4945; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
4946; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4947; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
4948; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4949; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
4950; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
4951; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
4952; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4953; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v12
4954; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4955; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
4956; ZVFHMIN-NEXT:    vse16.v v10, (a0)
4957; ZVFHMIN-NEXT:    ret
4958  %a = load <8 x half>, ptr %x
4959  %b = load <8 x half>, ptr %y
4960  %c = load <8 x half>, ptr %z
4961  %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
4962  store <8 x half> %d, ptr %x
4963  ret void
4964}
4965
4966define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
4967; ZVFH-LABEL: fmuladd_v6f16:
4968; ZVFH:       # %bb.0:
4969; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4970; ZVFH-NEXT:    vle16.v v8, (a0)
4971; ZVFH-NEXT:    vle16.v v9, (a1)
4972; ZVFH-NEXT:    vle16.v v10, (a2)
4973; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
4974; ZVFH-NEXT:    vse16.v v10, (a0)
4975; ZVFH-NEXT:    ret
4976;
4977; ZVFHMIN-LABEL: fmuladd_v6f16:
4978; ZVFHMIN:       # %bb.0:
4979; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
4980; ZVFHMIN-NEXT:    vle16.v v8, (a1)
4981; ZVFHMIN-NEXT:    vle16.v v9, (a0)
4982; ZVFHMIN-NEXT:    vle16.v v10, (a2)
4983; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
4984; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
4985; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4986; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
4987; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4988; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
4989; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
4990; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
4991; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4992; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v12
4993; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
4994; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
4995; ZVFHMIN-NEXT:    vse16.v v10, (a0)
4996; ZVFHMIN-NEXT:    ret
4997  %a = load <6 x half>, ptr %x
4998  %b = load <6 x half>, ptr %y
4999  %c = load <6 x half>, ptr %z
5000  %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
5001  store <6 x half> %d, ptr %x
5002  ret void
5003}
5004
5005define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
5006; CHECK-LABEL: fmuladd_v4f32:
5007; CHECK:       # %bb.0:
5008; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5009; CHECK-NEXT:    vle32.v v8, (a0)
5010; CHECK-NEXT:    vle32.v v9, (a1)
5011; CHECK-NEXT:    vle32.v v10, (a2)
5012; CHECK-NEXT:    vfmacc.vv v10, v8, v9
5013; CHECK-NEXT:    vse32.v v10, (a0)
5014; CHECK-NEXT:    ret
5015  %a = load <4 x float>, ptr %x
5016  %b = load <4 x float>, ptr %y
5017  %c = load <4 x float>, ptr %z
5018  %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
5019  store <4 x float> %d, ptr %x
5020  ret void
5021}
5022
5023define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
5024; CHECK-LABEL: fmuladd_v2f64:
5025; CHECK:       # %bb.0:
5026; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
5027; CHECK-NEXT:    vle64.v v8, (a0)
5028; CHECK-NEXT:    vle64.v v9, (a1)
5029; CHECK-NEXT:    vle64.v v10, (a2)
5030; CHECK-NEXT:    vfmacc.vv v10, v8, v9
5031; CHECK-NEXT:    vse64.v v10, (a0)
5032; CHECK-NEXT:    ret
5033  %a = load <2 x double>, ptr %x
5034  %b = load <2 x double>, ptr %y
5035  %c = load <2 x double>, ptr %z
5036  %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5037  store <2 x double> %d, ptr %x
5038  ret void
5039}
5040
5041define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
5042; CHECK-LABEL: fmsub_fmuladd_v8bf16:
5043; CHECK:       # %bb.0:
5044; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
5045; CHECK-NEXT:    vle16.v v8, (a1)
5046; CHECK-NEXT:    vle16.v v9, (a0)
5047; CHECK-NEXT:    vle16.v v10, (a2)
5048; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
5049; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
5050; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5051; CHECK-NEXT:    vfmul.vv v8, v14, v12
5052; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5053; CHECK-NEXT:    vfncvtbf16.f.f.w v11, v8
5054; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v11
5055; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
5056; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5057; CHECK-NEXT:    vfsub.vv v8, v8, v12
5058; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5059; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
5060; CHECK-NEXT:    vse16.v v10, (a0)
5061; CHECK-NEXT:    ret
5062  %a = load <8 x bfloat>, ptr %x
5063  %b = load <8 x bfloat>, ptr %y
5064  %c = load <8 x bfloat>, ptr %z
5065  %neg = fneg <8 x bfloat> %c
5066  %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg)
5067  store <8 x bfloat> %d, ptr %x
5068  ret void
5069}
5070
5071define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
5072; CHECK-LABEL: fmsub_fmuladd_v6bf16:
5073; CHECK:       # %bb.0:
5074; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
5075; CHECK-NEXT:    vle16.v v8, (a1)
5076; CHECK-NEXT:    vle16.v v9, (a0)
5077; CHECK-NEXT:    vle16.v v10, (a2)
5078; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
5079; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
5080; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5081; CHECK-NEXT:    vfmul.vv v8, v14, v12
5082; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5083; CHECK-NEXT:    vfncvtbf16.f.f.w v11, v8
5084; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v11
5085; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
5086; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5087; CHECK-NEXT:    vfsub.vv v8, v8, v12
5088; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5089; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
5090; CHECK-NEXT:    vse16.v v10, (a0)
5091; CHECK-NEXT:    ret
5092  %a = load <6 x bfloat>, ptr %x
5093  %b = load <6 x bfloat>, ptr %y
5094  %c = load <6 x bfloat>, ptr %z
5095  %neg = fneg <6 x bfloat> %c
5096  %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg)
5097  store <6 x bfloat> %d, ptr %x
5098  ret void
5099}
5100
5101define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
5102; ZVFH-LABEL: fmsub_fmuladd_v8f16:
5103; ZVFH:       # %bb.0:
5104; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
5105; ZVFH-NEXT:    vle16.v v8, (a0)
5106; ZVFH-NEXT:    vle16.v v9, (a1)
5107; ZVFH-NEXT:    vle16.v v10, (a2)
5108; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
5109; ZVFH-NEXT:    vse16.v v10, (a0)
5110; ZVFH-NEXT:    ret
5111;
5112; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
5113; ZVFHMIN:       # %bb.0:
5114; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
5115; ZVFHMIN-NEXT:    vle16.v v8, (a1)
5116; ZVFHMIN-NEXT:    vle16.v v9, (a0)
5117; ZVFHMIN-NEXT:    vle16.v v10, (a2)
5118; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
5119; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
5120; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5121; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
5122; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5123; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
5124; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
5125; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
5126; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5127; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
5128; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5129; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
5130; ZVFHMIN-NEXT:    vse16.v v10, (a0)
5131; ZVFHMIN-NEXT:    ret
5132  %a = load <8 x half>, ptr %x
5133  %b = load <8 x half>, ptr %y
5134  %c = load <8 x half>, ptr %z
5135  %neg = fneg <8 x half> %c
5136  %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
5137  store <8 x half> %d, ptr %x
5138  ret void
5139}
5140
5141define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
5142; ZVFH-LABEL: fmsub_fmuladd_v6f16:
5143; ZVFH:       # %bb.0:
5144; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
5145; ZVFH-NEXT:    vle16.v v8, (a0)
5146; ZVFH-NEXT:    vle16.v v9, (a1)
5147; ZVFH-NEXT:    vle16.v v10, (a2)
5148; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
5149; ZVFH-NEXT:    vse16.v v10, (a0)
5150; ZVFH-NEXT:    ret
5151;
5152; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
5153; ZVFHMIN:       # %bb.0:
5154; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
5155; ZVFHMIN-NEXT:    vle16.v v8, (a1)
5156; ZVFHMIN-NEXT:    vle16.v v9, (a0)
5157; ZVFHMIN-NEXT:    vle16.v v10, (a2)
5158; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
5159; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
5160; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5161; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
5162; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5163; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
5164; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
5165; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
5166; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5167; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
5168; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
5169; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
5170; ZVFHMIN-NEXT:    vse16.v v10, (a0)
5171; ZVFHMIN-NEXT:    ret
5172  %a = load <6 x half>, ptr %x
5173  %b = load <6 x half>, ptr %y
5174  %c = load <6 x half>, ptr %z
5175  %neg = fneg <6 x half> %c
5176  %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
5177  store <6 x half> %d, ptr %x
5178  ret void
5179}
5180
5181define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
5182; CHECK-LABEL: fnmsub_fmuladd_v4f32:
5183; CHECK:       # %bb.0:
5184; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5185; CHECK-NEXT:    vle32.v v8, (a0)
5186; CHECK-NEXT:    vle32.v v9, (a1)
5187; CHECK-NEXT:    vle32.v v10, (a2)
5188; CHECK-NEXT:    vfnmsac.vv v10, v8, v9
5189; CHECK-NEXT:    vse32.v v10, (a0)
5190; CHECK-NEXT:    ret
5191  %a = load <4 x float>, ptr %x
5192  %b = load <4 x float>, ptr %y
5193  %c = load <4 x float>, ptr %z
5194  %neg = fneg <4 x float> %a
5195  %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
5196  store <4 x float> %d, ptr %x
5197  ret void
5198}
5199
5200define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
5201; CHECK-LABEL: fnmadd_fmuladd_v2f64:
5202; CHECK:       # %bb.0:
5203; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
5204; CHECK-NEXT:    vle64.v v8, (a0)
5205; CHECK-NEXT:    vle64.v v9, (a1)
5206; CHECK-NEXT:    vle64.v v10, (a2)
5207; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
5208; CHECK-NEXT:    vse64.v v10, (a0)
5209; CHECK-NEXT:    ret
5210  %a = load <2 x double>, ptr %x
5211  %b = load <2 x double>, ptr %y
5212  %c = load <2 x double>, ptr %z
5213  %neg = fneg <2 x double> %b
5214  %neg2 = fneg <2 x double> %c
5215  %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
5216  store <2 x double> %d, ptr %x
5217  ret void
5218}
5219