xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v,+m -target-abi=ilp32d \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v,+m -target-abi=lp64d \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
12
13define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
14; ZVFH-LABEL: vfmax_vv_v2f16:
15; ZVFH:       # %bb.0:
16; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
17; ZVFH-NEXT:    vmv1r.v v10, v0
18; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
19; ZVFH-NEXT:    vmerge.vvm v11, v8, v9, v0
20; ZVFH-NEXT:    vmv1r.v v0, v10
21; ZVFH-NEXT:    vmfeq.vv v0, v9, v9, v0.t
22; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
23; ZVFH-NEXT:    vmv1r.v v0, v10
24; ZVFH-NEXT:    vfmax.vv v8, v8, v11, v0.t
25; ZVFH-NEXT:    ret
26;
27; ZVFHMIN-LABEL: vfmax_vv_v2f16:
28; ZVFHMIN:       # %bb.0:
29; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
30; ZVFHMIN-NEXT:    vmv1r.v v10, v0
31; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
32; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
33; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
34; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9, v0.t
35; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v11, v0
36; ZVFHMIN-NEXT:    vmv1r.v v0, v10
37; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
38; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
39; ZVFHMIN-NEXT:    vmv1r.v v0, v10
40; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
41; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
42; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
43; ZVFHMIN-NEXT:    ret
44  %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
45  ret <2 x half> %v
46}
47
48define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 zeroext %evl) {
49; ZVFH-LABEL: vfmax_vv_v2f16_unmasked:
50; ZVFH:       # %bb.0:
51; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
52; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
53; ZVFH-NEXT:    vmerge.vvm v10, v8, v9, v0
54; ZVFH-NEXT:    vmfeq.vv v0, v9, v9
55; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
56; ZVFH-NEXT:    vfmax.vv v8, v8, v10
57; ZVFH-NEXT:    ret
58;
59; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked:
60; ZVFHMIN:       # %bb.0:
61; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
62; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
63; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
64; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
65; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
66; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
67; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
68; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
69; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
70; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
71; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
72; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
73; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
74; ZVFHMIN-NEXT:    ret
75  %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
76  ret <2 x half> %v
77}
78
79declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32)
80
81define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
82; ZVFH-LABEL: vfmax_vv_v4f16:
83; ZVFH:       # %bb.0:
84; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
85; ZVFH-NEXT:    vmv1r.v v10, v0
86; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
87; ZVFH-NEXT:    vmerge.vvm v11, v8, v9, v0
88; ZVFH-NEXT:    vmv1r.v v0, v10
89; ZVFH-NEXT:    vmfeq.vv v0, v9, v9, v0.t
90; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
91; ZVFH-NEXT:    vmv1r.v v0, v10
92; ZVFH-NEXT:    vfmax.vv v8, v8, v11, v0.t
93; ZVFH-NEXT:    ret
94;
95; ZVFHMIN-LABEL: vfmax_vv_v4f16:
96; ZVFHMIN:       # %bb.0:
97; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
98; ZVFHMIN-NEXT:    vmv1r.v v10, v0
99; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
100; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
101; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
102; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9, v0.t
103; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v11, v0
104; ZVFHMIN-NEXT:    vmv1r.v v0, v10
105; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
106; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
107; ZVFHMIN-NEXT:    vmv1r.v v0, v10
108; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
109; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
110; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
111; ZVFHMIN-NEXT:    ret
112  %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
113  ret <4 x half> %v
114}
115
116define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 zeroext %evl) {
117; ZVFH-LABEL: vfmax_vv_v4f16_unmasked:
118; ZVFH:       # %bb.0:
119; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
120; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
121; ZVFH-NEXT:    vmerge.vvm v10, v8, v9, v0
122; ZVFH-NEXT:    vmfeq.vv v0, v9, v9
123; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
124; ZVFH-NEXT:    vfmax.vv v8, v8, v10
125; ZVFH-NEXT:    ret
126;
127; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked:
128; ZVFHMIN:       # %bb.0:
129; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
130; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
131; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
132; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
133; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
134; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
135; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
136; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
137; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
138; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
139; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
140; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
141; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
142; ZVFHMIN-NEXT:    ret
143  %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
144  ret <4 x half> %v
145}
146
147declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32)
148
149define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
150; ZVFH-LABEL: vfmax_vv_v8f16:
151; ZVFH:       # %bb.0:
152; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
153; ZVFH-NEXT:    vmv1r.v v10, v0
154; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
155; ZVFH-NEXT:    vmerge.vvm v11, v8, v9, v0
156; ZVFH-NEXT:    vmv1r.v v0, v10
157; ZVFH-NEXT:    vmfeq.vv v0, v9, v9, v0.t
158; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
159; ZVFH-NEXT:    vmv1r.v v0, v10
160; ZVFH-NEXT:    vfmax.vv v8, v8, v11, v0.t
161; ZVFH-NEXT:    ret
162;
163; ZVFHMIN-LABEL: vfmax_vv_v8f16:
164; ZVFHMIN:       # %bb.0:
165; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
166; ZVFHMIN-NEXT:    vmv1r.v v10, v0
167; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
168; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
169; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
170; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
171; ZVFHMIN-NEXT:    vmv1r.v v0, v8
172; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
173; ZVFHMIN-NEXT:    vmv1r.v v0, v10
174; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
175; ZVFHMIN-NEXT:    vmv1r.v v0, v8
176; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
177; ZVFHMIN-NEXT:    vmv1r.v v0, v10
178; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v16, v0.t
179; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
180; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
181; ZVFHMIN-NEXT:    ret
182  %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
183  ret <8 x half> %v
184}
185
186define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 zeroext %evl) {
187; ZVFH-LABEL: vfmax_vv_v8f16_unmasked:
188; ZVFH:       # %bb.0:
189; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
190; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
191; ZVFH-NEXT:    vmerge.vvm v10, v8, v9, v0
192; ZVFH-NEXT:    vmfeq.vv v0, v9, v9
193; ZVFH-NEXT:    vmerge.vvm v8, v9, v8, v0
194; ZVFH-NEXT:    vfmax.vv v8, v8, v10
195; ZVFH-NEXT:    ret
196;
197; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked:
198; ZVFHMIN:       # %bb.0:
199; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
200; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
201; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
202; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
203; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
204; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
205; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
206; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
207; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
208; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
209; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
210; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
211; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
212; ZVFHMIN-NEXT:    ret
213  %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
214  ret <8 x half> %v
215}
216
217declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, i32)
218
219define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
220; ZVFH-LABEL: vfmax_vv_v16f16:
221; ZVFH:       # %bb.0:
222; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
223; ZVFH-NEXT:    vmv1r.v v12, v0
224; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
225; ZVFH-NEXT:    vmv1r.v v0, v13
226; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
227; ZVFH-NEXT:    vmv1r.v v0, v12
228; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
229; ZVFH-NEXT:    vmv1r.v v0, v13
230; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
231; ZVFH-NEXT:    vmv1r.v v0, v12
232; ZVFH-NEXT:    vfmax.vv v8, v8, v14, v0.t
233; ZVFH-NEXT:    ret
234;
235; ZVFHMIN-LABEL: vfmax_vv_v16f16:
236; ZVFHMIN:       # %bb.0:
237; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
238; ZVFHMIN-NEXT:    vmv1r.v v12, v0
239; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
240; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
241; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
242; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
243; ZVFHMIN-NEXT:    vmv1r.v v0, v8
244; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
245; ZVFHMIN-NEXT:    vmv1r.v v0, v12
246; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
247; ZVFHMIN-NEXT:    vmv1r.v v0, v8
248; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
249; ZVFHMIN-NEXT:    vmv1r.v v0, v12
250; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24, v0.t
251; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
252; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
253; ZVFHMIN-NEXT:    ret
254  %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
255  ret <16 x half> %v
256}
257
258define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i32 zeroext %evl) {
259; ZVFH-LABEL: vfmax_vv_v16f16_unmasked:
260; ZVFH:       # %bb.0:
261; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
262; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
263; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
264; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
265; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
266; ZVFH-NEXT:    vfmax.vv v8, v8, v12
267; ZVFH-NEXT:    ret
268;
269; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked:
270; ZVFHMIN:       # %bb.0:
271; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
272; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
273; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
274; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
275; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
276; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
277; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
278; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
279; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
280; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
281; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
282; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
283; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
284; ZVFHMIN-NEXT:    ret
285  %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
286  ret <16 x half> %v
287}
288
289declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32)
290
291define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
292; CHECK-LABEL: vfmax_vv_v2f32:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
295; CHECK-NEXT:    vmv1r.v v10, v0
296; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
297; CHECK-NEXT:    vmerge.vvm v11, v8, v9, v0
298; CHECK-NEXT:    vmv1r.v v0, v10
299; CHECK-NEXT:    vmfeq.vv v0, v9, v9, v0.t
300; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
301; CHECK-NEXT:    vmv1r.v v0, v10
302; CHECK-NEXT:    vfmax.vv v8, v8, v11, v0.t
303; CHECK-NEXT:    ret
304  %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl)
305  ret <2 x float> %v
306}
307
308define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i32 zeroext %evl) {
309; CHECK-LABEL: vfmax_vv_v2f32_unmasked:
310; CHECK:       # %bb.0:
311; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
312; CHECK-NEXT:    vmfeq.vv v0, v8, v8
313; CHECK-NEXT:    vmerge.vvm v10, v8, v9, v0
314; CHECK-NEXT:    vmfeq.vv v0, v9, v9
315; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
316; CHECK-NEXT:    vfmax.vv v8, v8, v10
317; CHECK-NEXT:    ret
318  %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl)
319  ret <2 x float> %v
320}
321
322declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
323
324define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
325; CHECK-LABEL: vfmax_vv_v4f32:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
328; CHECK-NEXT:    vmv1r.v v10, v0
329; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
330; CHECK-NEXT:    vmerge.vvm v11, v8, v9, v0
331; CHECK-NEXT:    vmv1r.v v0, v10
332; CHECK-NEXT:    vmfeq.vv v0, v9, v9, v0.t
333; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
334; CHECK-NEXT:    vmv1r.v v0, v10
335; CHECK-NEXT:    vfmax.vv v8, v8, v11, v0.t
336; CHECK-NEXT:    ret
337  %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
338  ret <4 x float> %v
339}
340
341define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i32 zeroext %evl) {
342; CHECK-LABEL: vfmax_vv_v4f32_unmasked:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
345; CHECK-NEXT:    vmfeq.vv v0, v8, v8
346; CHECK-NEXT:    vmerge.vvm v10, v8, v9, v0
347; CHECK-NEXT:    vmfeq.vv v0, v9, v9
348; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
349; CHECK-NEXT:    vfmax.vv v8, v8, v10
350; CHECK-NEXT:    ret
351  %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl)
352  ret <4 x float> %v
353}
354
355declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
356
357define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
358; CHECK-LABEL: vfmax_vv_v8f32:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
361; CHECK-NEXT:    vmv1r.v v12, v0
362; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
363; CHECK-NEXT:    vmv1r.v v0, v13
364; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
365; CHECK-NEXT:    vmv1r.v v0, v12
366; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
367; CHECK-NEXT:    vmv1r.v v0, v13
368; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
369; CHECK-NEXT:    vmv1r.v v0, v12
370; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
371; CHECK-NEXT:    ret
372  %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
373  ret <8 x float> %v
374}
375
376define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i32 zeroext %evl) {
377; CHECK-LABEL: vfmax_vv_v8f32_unmasked:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
380; CHECK-NEXT:    vmfeq.vv v0, v8, v8
381; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
382; CHECK-NEXT:    vmfeq.vv v0, v10, v10
383; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
384; CHECK-NEXT:    vfmax.vv v8, v8, v12
385; CHECK-NEXT:    ret
386  %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl)
387  ret <8 x float> %v
388}
389
390declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i1>, i32)
391
392define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
393; CHECK-LABEL: vfmax_vv_v16f32:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
396; CHECK-NEXT:    vmv1r.v v16, v0
397; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
398; CHECK-NEXT:    vmv1r.v v0, v17
399; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
400; CHECK-NEXT:    vmv1r.v v0, v16
401; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
402; CHECK-NEXT:    vmv1r.v v0, v17
403; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
404; CHECK-NEXT:    vmv1r.v v0, v16
405; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
406; CHECK-NEXT:    ret
407  %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl)
408  ret <16 x float> %v
409}
410
411define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb, i32 zeroext %evl) {
412; CHECK-LABEL: vfmax_vv_v16f32_unmasked:
413; CHECK:       # %bb.0:
414; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
415; CHECK-NEXT:    vmfeq.vv v0, v8, v8
416; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
417; CHECK-NEXT:    vmfeq.vv v0, v12, v12
418; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
419; CHECK-NEXT:    vfmax.vv v8, v8, v16
420; CHECK-NEXT:    ret
421  %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl)
422  ret <16 x float> %v
423}
424
425declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1>, i32)
426
427define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
428; CHECK-LABEL: vfmax_vv_v2f64:
429; CHECK:       # %bb.0:
430; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
431; CHECK-NEXT:    vmv1r.v v10, v0
432; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
433; CHECK-NEXT:    vmerge.vvm v11, v8, v9, v0
434; CHECK-NEXT:    vmv1r.v v0, v10
435; CHECK-NEXT:    vmfeq.vv v0, v9, v9, v0.t
436; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
437; CHECK-NEXT:    vmv1r.v v0, v10
438; CHECK-NEXT:    vfmax.vv v8, v8, v11, v0.t
439; CHECK-NEXT:    ret
440  %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl)
441  ret <2 x double> %v
442}
443
444define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, i32 zeroext %evl) {
445; CHECK-LABEL: vfmax_vv_v2f64_unmasked:
446; CHECK:       # %bb.0:
447; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
448; CHECK-NEXT:    vmfeq.vv v0, v8, v8
449; CHECK-NEXT:    vmerge.vvm v10, v8, v9, v0
450; CHECK-NEXT:    vmfeq.vv v0, v9, v9
451; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
452; CHECK-NEXT:    vfmax.vv v8, v8, v10
453; CHECK-NEXT:    ret
454  %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl)
455  ret <2 x double> %v
456}
457
458declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1>, i32)
459
460define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
461; CHECK-LABEL: vfmax_vv_v4f64:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
464; CHECK-NEXT:    vmv1r.v v12, v0
465; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
466; CHECK-NEXT:    vmv1r.v v0, v13
467; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
468; CHECK-NEXT:    vmv1r.v v0, v12
469; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
470; CHECK-NEXT:    vmv1r.v v0, v13
471; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
472; CHECK-NEXT:    vmv1r.v v0, v12
473; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
474; CHECK-NEXT:    ret
475  %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl)
476  ret <4 x double> %v
477}
478
479define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, i32 zeroext %evl) {
480; CHECK-LABEL: vfmax_vv_v4f64_unmasked:
481; CHECK:       # %bb.0:
482; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
483; CHECK-NEXT:    vmfeq.vv v0, v8, v8
484; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
485; CHECK-NEXT:    vmfeq.vv v0, v10, v10
486; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
487; CHECK-NEXT:    vfmax.vv v8, v8, v12
488; CHECK-NEXT:    ret
489  %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl)
490  ret <4 x double> %v
491}
492
493declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32)
494
495define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
496; CHECK-LABEL: vfmax_vv_v8f64:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
499; CHECK-NEXT:    vmv1r.v v16, v0
500; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
501; CHECK-NEXT:    vmv1r.v v0, v17
502; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
503; CHECK-NEXT:    vmv1r.v v0, v16
504; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
505; CHECK-NEXT:    vmv1r.v v0, v17
506; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
507; CHECK-NEXT:    vmv1r.v v0, v16
508; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
509; CHECK-NEXT:    ret
510  %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl)
511  ret <8 x double> %v
512}
513
514define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, i32 zeroext %evl) {
515; CHECK-LABEL: vfmax_vv_v8f64_unmasked:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
518; CHECK-NEXT:    vmfeq.vv v0, v8, v8
519; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
520; CHECK-NEXT:    vmfeq.vv v0, v12, v12
521; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
522; CHECK-NEXT:    vfmax.vv v8, v8, v16
523; CHECK-NEXT:    ret
524  %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl)
525  ret <8 x double> %v
526}
527
528declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 x i1>, i32)
529
530define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
531; CHECK-LABEL: vfmax_vv_v16f64:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    addi sp, sp, -16
534; CHECK-NEXT:    .cfi_def_cfa_offset 16
535; CHECK-NEXT:    csrr a1, vlenb
536; CHECK-NEXT:    slli a1, a1, 3
537; CHECK-NEXT:    sub sp, sp, a1
538; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
539; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
540; CHECK-NEXT:    vmv1r.v v7, v0
541; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
542; CHECK-NEXT:    vmv1r.v v0, v25
543; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
544; CHECK-NEXT:    addi a0, sp, 16
545; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
546; CHECK-NEXT:    vmv1r.v v0, v7
547; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
548; CHECK-NEXT:    vmv1r.v v0, v25
549; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
550; CHECK-NEXT:    vmv1r.v v0, v7
551; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
552; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
553; CHECK-NEXT:    csrr a0, vlenb
554; CHECK-NEXT:    slli a0, a0, 3
555; CHECK-NEXT:    add sp, sp, a0
556; CHECK-NEXT:    .cfi_def_cfa sp, 16
557; CHECK-NEXT:    addi sp, sp, 16
558; CHECK-NEXT:    .cfi_def_cfa_offset 0
559; CHECK-NEXT:    ret
560  %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
561  ret <16 x double> %v
562}
563
564define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) {
565; CHECK-LABEL: vfmax_vv_v16f64_unmasked:
566; CHECK:       # %bb.0:
567; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
568; CHECK-NEXT:    vmfeq.vv v0, v8, v8
569; CHECK-NEXT:    vmfeq.vv v7, v16, v16
570; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
571; CHECK-NEXT:    vmv1r.v v0, v7
572; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
573; CHECK-NEXT:    vfmax.vv v8, v8, v24
574; CHECK-NEXT:    ret
575  %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl)
576  ret <16 x double> %v
577}
578
579declare <32 x double> @llvm.vp.maximum.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32)
580
581define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
582; CHECK-LABEL: vfmax_vv_v32f64:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    addi sp, sp, -16
585; CHECK-NEXT:    .cfi_def_cfa_offset 16
586; CHECK-NEXT:    csrr a1, vlenb
587; CHECK-NEXT:    slli a1, a1, 5
588; CHECK-NEXT:    sub sp, sp, a1
589; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
590; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
591; CHECK-NEXT:    vmv1r.v v25, v0
592; CHECK-NEXT:    csrr a1, vlenb
593; CHECK-NEXT:    slli a1, a1, 4
594; CHECK-NEXT:    add a1, sp, a1
595; CHECK-NEXT:    addi a1, a1, 16
596; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
597; CHECK-NEXT:    csrr a1, vlenb
598; CHECK-NEXT:    li a3, 24
599; CHECK-NEXT:    mul a1, a1, a3
600; CHECK-NEXT:    add a1, sp, a1
601; CHECK-NEXT:    addi a1, a1, 16
602; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
603; CHECK-NEXT:    addi a1, a0, 128
604; CHECK-NEXT:    vle64.v v16, (a1)
605; CHECK-NEXT:    csrr a1, vlenb
606; CHECK-NEXT:    slli a1, a1, 3
607; CHECK-NEXT:    add a1, sp, a1
608; CHECK-NEXT:    addi a1, a1, 16
609; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
610; CHECK-NEXT:    vle64.v v16, (a0)
611; CHECK-NEXT:    li a1, 16
612; CHECK-NEXT:    mv a0, a2
613; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
614; CHECK-NEXT:    vslidedown.vi v7, v0, 2
615; CHECK-NEXT:    bltu a2, a1, .LBB24_2
616; CHECK-NEXT:  # %bb.1:
617; CHECK-NEXT:    li a0, 16
618; CHECK-NEXT:  .LBB24_2:
619; CHECK-NEXT:    vmv1r.v v0, v25
620; CHECK-NEXT:    csrr a1, vlenb
621; CHECK-NEXT:    li a3, 24
622; CHECK-NEXT:    mul a1, a1, a3
623; CHECK-NEXT:    add a1, sp, a1
624; CHECK-NEXT:    addi a1, a1, 16
625; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
626; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
627; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
628; CHECK-NEXT:    vmv1r.v v0, v26
629; CHECK-NEXT:    vmv8r.v v8, v16
630; CHECK-NEXT:    csrr a0, vlenb
631; CHECK-NEXT:    li a1, 24
632; CHECK-NEXT:    mul a0, a0, a1
633; CHECK-NEXT:    add a0, sp, a0
634; CHECK-NEXT:    addi a0, a0, 16
635; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
636; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
637; CHECK-NEXT:    addi a0, sp, 16
638; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
639; CHECK-NEXT:    vmv1r.v v0, v25
640; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
641; CHECK-NEXT:    vmv1r.v v0, v26
642; CHECK-NEXT:    csrr a0, vlenb
643; CHECK-NEXT:    li a1, 24
644; CHECK-NEXT:    mul a0, a0, a1
645; CHECK-NEXT:    add a0, sp, a0
646; CHECK-NEXT:    addi a0, a0, 16
647; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
648; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
649; CHECK-NEXT:    vmv1r.v v0, v25
650; CHECK-NEXT:    addi a0, sp, 16
651; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
652; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
653; CHECK-NEXT:    csrr a0, vlenb
654; CHECK-NEXT:    li a1, 24
655; CHECK-NEXT:    mul a0, a0, a1
656; CHECK-NEXT:    add a0, sp, a0
657; CHECK-NEXT:    addi a0, a0, 16
658; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
659; CHECK-NEXT:    addi a0, a2, -16
660; CHECK-NEXT:    sltu a1, a2, a0
661; CHECK-NEXT:    addi a1, a1, -1
662; CHECK-NEXT:    and a0, a1, a0
663; CHECK-NEXT:    vmv1r.v v0, v7
664; CHECK-NEXT:    csrr a1, vlenb
665; CHECK-NEXT:    slli a1, a1, 4
666; CHECK-NEXT:    add a1, sp, a1
667; CHECK-NEXT:    addi a1, a1, 16
668; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
669; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
670; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
671; CHECK-NEXT:    vmv1r.v v0, v25
672; CHECK-NEXT:    csrr a0, vlenb
673; CHECK-NEXT:    slli a0, a0, 3
674; CHECK-NEXT:    add a0, sp, a0
675; CHECK-NEXT:    addi a0, a0, 16
676; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
677; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
678; CHECK-NEXT:    addi a0, sp, 16
679; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
680; CHECK-NEXT:    vmv1r.v v0, v7
681; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
682; CHECK-NEXT:    vmv1r.v v0, v25
683; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
684; CHECK-NEXT:    vmv1r.v v0, v7
685; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
686; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
687; CHECK-NEXT:    csrr a0, vlenb
688; CHECK-NEXT:    li a1, 24
689; CHECK-NEXT:    mul a0, a0, a1
690; CHECK-NEXT:    add a0, sp, a0
691; CHECK-NEXT:    addi a0, a0, 16
692; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
693; CHECK-NEXT:    csrr a0, vlenb
694; CHECK-NEXT:    slli a0, a0, 5
695; CHECK-NEXT:    add sp, sp, a0
696; CHECK-NEXT:    .cfi_def_cfa sp, 16
697; CHECK-NEXT:    addi sp, sp, 16
698; CHECK-NEXT:    .cfi_def_cfa_offset 0
699; CHECK-NEXT:    ret
700  %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl)
701  ret <32 x double> %v
702}
703
704define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) {
705; CHECK-LABEL: vfmax_vv_v32f64_unmasked:
706; CHECK:       # %bb.0:
707; CHECK-NEXT:    addi sp, sp, -16
708; CHECK-NEXT:    .cfi_def_cfa_offset 16
709; CHECK-NEXT:    csrr a1, vlenb
710; CHECK-NEXT:    li a3, 24
711; CHECK-NEXT:    mul a1, a1, a3
712; CHECK-NEXT:    sub sp, sp, a1
713; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
714; CHECK-NEXT:    csrr a1, vlenb
715; CHECK-NEXT:    slli a1, a1, 4
716; CHECK-NEXT:    add a1, sp, a1
717; CHECK-NEXT:    addi a1, a1, 16
718; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
719; CHECK-NEXT:    addi a1, a0, 128
720; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
721; CHECK-NEXT:    vle64.v v16, (a1)
722; CHECK-NEXT:    csrr a1, vlenb
723; CHECK-NEXT:    slli a1, a1, 3
724; CHECK-NEXT:    add a1, sp, a1
725; CHECK-NEXT:    addi a1, a1, 16
726; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
727; CHECK-NEXT:    vle64.v v24, (a0)
728; CHECK-NEXT:    li a1, 16
729; CHECK-NEXT:    mv a0, a2
730; CHECK-NEXT:    bltu a2, a1, .LBB25_2
731; CHECK-NEXT:  # %bb.1:
732; CHECK-NEXT:    li a0, 16
733; CHECK-NEXT:  .LBB25_2:
734; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
735; CHECK-NEXT:    vmfeq.vv v0, v8, v8
736; CHECK-NEXT:    vmfeq.vv v7, v24, v24
737; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
738; CHECK-NEXT:    vmv1r.v v0, v7
739; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
740; CHECK-NEXT:    vfmax.vv v8, v8, v16
741; CHECK-NEXT:    addi a0, sp, 16
742; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
743; CHECK-NEXT:    addi a0, a2, -16
744; CHECK-NEXT:    sltu a1, a2, a0
745; CHECK-NEXT:    addi a1, a1, -1
746; CHECK-NEXT:    and a0, a1, a0
747; CHECK-NEXT:    csrr a1, vlenb
748; CHECK-NEXT:    slli a1, a1, 4
749; CHECK-NEXT:    add a1, sp, a1
750; CHECK-NEXT:    addi a1, a1, 16
751; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
752; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
753; CHECK-NEXT:    vmfeq.vv v0, v16, v16
754; CHECK-NEXT:    csrr a0, vlenb
755; CHECK-NEXT:    slli a0, a0, 3
756; CHECK-NEXT:    add a0, sp, a0
757; CHECK-NEXT:    addi a0, a0, 16
758; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
759; CHECK-NEXT:    vmfeq.vv v7, v8, v8
760; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
761; CHECK-NEXT:    vmv1r.v v0, v7
762; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
763; CHECK-NEXT:    vfmax.vv v16, v16, v24
764; CHECK-NEXT:    addi a0, sp, 16
765; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
766; CHECK-NEXT:    csrr a0, vlenb
767; CHECK-NEXT:    li a1, 24
768; CHECK-NEXT:    mul a0, a0, a1
769; CHECK-NEXT:    add sp, sp, a0
770; CHECK-NEXT:    .cfi_def_cfa sp, 16
771; CHECK-NEXT:    addi sp, sp, 16
772; CHECK-NEXT:    .cfi_def_cfa_offset 0
773; CHECK-NEXT:    ret
774  %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl)
775  ret <32 x double> %v
776}
777