xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll (revision b24af43fdfa1b1242b7cb77540462212227c57c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; FMAXNM
10;
11
12; Don't use SVE for 64-bit vectors.
13define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
14; CHECK-LABEL: fmaxnm_v4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    fmaxnm v0.4h, v0.4h, v1.4h
17; CHECK-NEXT:    ret
18  %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2)
19  ret <4 x half> %res
20}
21
22; Don't use SVE for 128-bit vectors.
23define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
24; CHECK-LABEL: fmaxnm_v8f16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    fmaxnm v0.8h, v0.8h, v1.8h
27; CHECK-NEXT:    ret
28  %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2)
29  ret <8 x half> %res
30}
31
32define void @fmaxnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
33; CHECK-LABEL: fmaxnm_v16f16:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ptrue p0.h, vl16
36; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
37; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
38; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
39; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
40; CHECK-NEXT:    ret
41  %op1 = load <16 x half>, ptr %a
42  %op2 = load <16 x half>, ptr %b
43  %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2)
44  store <16 x half> %res, ptr %a
45  ret void
46}
47
48define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 {
49; VBITS_EQ_256-LABEL: fmaxnm_v32f16:
50; VBITS_EQ_256:       // %bb.0:
51; VBITS_EQ_256-NEXT:    ptrue p0.h, vl16
52; VBITS_EQ_256-NEXT:    mov x8, #16 // =0x10
53; VBITS_EQ_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
54; VBITS_EQ_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
55; VBITS_EQ_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
56; VBITS_EQ_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
57; VBITS_EQ_256-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
58; VBITS_EQ_256-NEXT:    movprfx z1, z2
59; VBITS_EQ_256-NEXT:    fmaxnm z1.h, p0/m, z1.h, z3.h
60; VBITS_EQ_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
61; VBITS_EQ_256-NEXT:    st1h { z1.h }, p0, [x0]
62; VBITS_EQ_256-NEXT:    ret
63;
64; VBITS_GE_512-LABEL: fmaxnm_v32f16:
65; VBITS_GE_512:       // %bb.0:
66; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
67; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
68; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
69; VBITS_GE_512-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
70; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
71; VBITS_GE_512-NEXT:    ret
72  %op1 = load <32 x half>, ptr %a
73  %op2 = load <32 x half>, ptr %b
74  %res = call <32 x half> @llvm.maxnum.v32f16(<32 x half> %op1, <32 x half> %op2)
75  store <32 x half> %res, ptr %a
76  ret void
77}
78
79define void @fmaxnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
80; CHECK-LABEL: fmaxnm_v64f16:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ptrue p0.h, vl64
83; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
84; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
85; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
86; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
87; CHECK-NEXT:    ret
88  %op1 = load <64 x half>, ptr %a
89  %op2 = load <64 x half>, ptr %b
90  %res = call <64 x half> @llvm.maxnum.v64f16(<64 x half> %op1, <64 x half> %op2)
91  store <64 x half> %res, ptr %a
92  ret void
93}
94
95define void @fmaxnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
96; CHECK-LABEL: fmaxnm_v128f16:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    ptrue p0.h, vl128
99; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
100; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
101; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
102; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
103; CHECK-NEXT:    ret
104  %op1 = load <128 x half>, ptr %a
105  %op2 = load <128 x half>, ptr %b
106  %res = call <128 x half> @llvm.maxnum.v128f16(<128 x half> %op1, <128 x half> %op2)
107  store <128 x half> %res, ptr %a
108  ret void
109}
110
111; Don't use SVE for 64-bit vectors.
112define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
113; CHECK-LABEL: fmaxnm_v2f32:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    fmaxnm v0.2s, v0.2s, v1.2s
116; CHECK-NEXT:    ret
117  %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2)
118  ret <2 x float> %res
119}
120
121; Don't use SVE for 128-bit vectors.
122define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
123; CHECK-LABEL: fmaxnm_v4f32:
124; CHECK:       // %bb.0:
125; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
126; CHECK-NEXT:    ret
127  %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2)
128  ret <4 x float> %res
129}
130
131define void @fmaxnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
132; CHECK-LABEL: fmaxnm_v8f32:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    ptrue p0.s, vl8
135; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
136; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
137; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
138; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
139; CHECK-NEXT:    ret
140  %op1 = load <8 x float>, ptr %a
141  %op2 = load <8 x float>, ptr %b
142  %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2)
143  store <8 x float> %res, ptr %a
144  ret void
145}
146
147define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 {
148; VBITS_EQ_256-LABEL: fmaxnm_v16f32:
149; VBITS_EQ_256:       // %bb.0:
150; VBITS_EQ_256-NEXT:    ptrue p0.s, vl8
151; VBITS_EQ_256-NEXT:    mov x8, #8 // =0x8
152; VBITS_EQ_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
153; VBITS_EQ_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
154; VBITS_EQ_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
155; VBITS_EQ_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
156; VBITS_EQ_256-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
157; VBITS_EQ_256-NEXT:    movprfx z1, z2
158; VBITS_EQ_256-NEXT:    fmaxnm z1.s, p0/m, z1.s, z3.s
159; VBITS_EQ_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
160; VBITS_EQ_256-NEXT:    st1w { z1.s }, p0, [x0]
161; VBITS_EQ_256-NEXT:    ret
162;
163; VBITS_GE_512-LABEL: fmaxnm_v16f32:
164; VBITS_GE_512:       // %bb.0:
165; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
166; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
167; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
168; VBITS_GE_512-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
169; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
170; VBITS_GE_512-NEXT:    ret
171  %op1 = load <16 x float>, ptr %a
172  %op2 = load <16 x float>, ptr %b
173  %res = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %op1, <16 x float> %op2)
174  store <16 x float> %res, ptr %a
175  ret void
176}
177
178define void @fmaxnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
179; CHECK-LABEL: fmaxnm_v32f32:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    ptrue p0.s, vl32
182; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
183; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
184; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
185; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
186; CHECK-NEXT:    ret
187  %op1 = load <32 x float>, ptr %a
188  %op2 = load <32 x float>, ptr %b
189  %res = call <32 x float> @llvm.maxnum.v32f32(<32 x float> %op1, <32 x float> %op2)
190  store <32 x float> %res, ptr %a
191  ret void
192}
193
194define void @fmaxnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
195; CHECK-LABEL: fmaxnm_v64f32:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    ptrue p0.s, vl64
198; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
199; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
200; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
201; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
202; CHECK-NEXT:    ret
203  %op1 = load <64 x float>, ptr %a
204  %op2 = load <64 x float>, ptr %b
205  %res = call <64 x float> @llvm.maxnum.v64f32(<64 x float> %op1, <64 x float> %op2)
206  store <64 x float> %res, ptr %a
207  ret void
208}
209
210; Don't use SVE for 64-bit vectors.
211define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
212; CHECK-LABEL: fmaxnm_v1f64:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    fmaxnm d0, d0, d1
215; CHECK-NEXT:    ret
216  %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
217  ret <1 x double> %res
218}
219
220; Don't use SVE for 128-bit vectors.
221define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
222; CHECK-LABEL: fmaxnm_v2f64:
223; CHECK:       // %bb.0:
224; CHECK-NEXT:    fmaxnm v0.2d, v0.2d, v1.2d
225; CHECK-NEXT:    ret
226  %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2)
227  ret <2 x double> %res
228}
229
230define void @fmaxnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
231; CHECK-LABEL: fmaxnm_v4f64:
232; CHECK:       // %bb.0:
233; CHECK-NEXT:    ptrue p0.d, vl4
234; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
235; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
236; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
237; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
238; CHECK-NEXT:    ret
239  %op1 = load <4 x double>, ptr %a
240  %op2 = load <4 x double>, ptr %b
241  %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2)
242  store <4 x double> %res, ptr %a
243  ret void
244}
245
246define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 {
247; VBITS_EQ_256-LABEL: fmaxnm_v8f64:
248; VBITS_EQ_256:       // %bb.0:
249; VBITS_EQ_256-NEXT:    ptrue p0.d, vl4
250; VBITS_EQ_256-NEXT:    mov x8, #4 // =0x4
251; VBITS_EQ_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
252; VBITS_EQ_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
253; VBITS_EQ_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
254; VBITS_EQ_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
255; VBITS_EQ_256-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
256; VBITS_EQ_256-NEXT:    movprfx z1, z2
257; VBITS_EQ_256-NEXT:    fmaxnm z1.d, p0/m, z1.d, z3.d
258; VBITS_EQ_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
259; VBITS_EQ_256-NEXT:    st1d { z1.d }, p0, [x0]
260; VBITS_EQ_256-NEXT:    ret
261;
262; VBITS_GE_512-LABEL: fmaxnm_v8f64:
263; VBITS_GE_512:       // %bb.0:
264; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
265; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
266; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
267; VBITS_GE_512-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
268; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
269; VBITS_GE_512-NEXT:    ret
270  %op1 = load <8 x double>, ptr %a
271  %op2 = load <8 x double>, ptr %b
272  %res = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %op1, <8 x double> %op2)
273  store <8 x double> %res, ptr %a
274  ret void
275}
276
277define void @fmaxnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
278; CHECK-LABEL: fmaxnm_v16f64:
279; CHECK:       // %bb.0:
280; CHECK-NEXT:    ptrue p0.d, vl16
281; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
282; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
283; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
284; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
285; CHECK-NEXT:    ret
286  %op1 = load <16 x double>, ptr %a
287  %op2 = load <16 x double>, ptr %b
288  %res = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %op1, <16 x double> %op2)
289  store <16 x double> %res, ptr %a
290  ret void
291}
292
293define void @fmaxnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
294; CHECK-LABEL: fmaxnm_v32f64:
295; CHECK:       // %bb.0:
296; CHECK-NEXT:    ptrue p0.d, vl32
297; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
298; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
299; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
300; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
301; CHECK-NEXT:    ret
302  %op1 = load <32 x double>, ptr %a
303  %op2 = load <32 x double>, ptr %b
304  %res = call <32 x double> @llvm.maxnum.v32f64(<32 x double> %op1, <32 x double> %op2)
305  store <32 x double> %res, ptr %a
306  ret void
307}
308
309;
310; FMINNM
311;
312
313; Don't use SVE for 64-bit vectors.
314define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
315; CHECK-LABEL: fminnm_v4f16:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    fminnm v0.4h, v0.4h, v1.4h
318; CHECK-NEXT:    ret
319  %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2)
320  ret <4 x half> %res
321}
322
323; Don't use SVE for 128-bit vectors.
324define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
325; CHECK-LABEL: fminnm_v8f16:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h
328; CHECK-NEXT:    ret
329  %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2)
330  ret <8 x half> %res
331}
332
333define void @fminnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
334; CHECK-LABEL: fminnm_v16f16:
335; CHECK:       // %bb.0:
336; CHECK-NEXT:    ptrue p0.h, vl16
337; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
338; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
339; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
340; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
341; CHECK-NEXT:    ret
342  %op1 = load <16 x half>, ptr %a
343  %op2 = load <16 x half>, ptr %b
344  %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2)
345  store <16 x half> %res, ptr %a
346  ret void
347}
348
349define void @fminnm_v32f16(ptr %a, ptr %b) #0 {
350; VBITS_EQ_256-LABEL: fminnm_v32f16:
351; VBITS_EQ_256:       // %bb.0:
352; VBITS_EQ_256-NEXT:    ptrue p0.h, vl16
353; VBITS_EQ_256-NEXT:    mov x8, #16 // =0x10
354; VBITS_EQ_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
355; VBITS_EQ_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
356; VBITS_EQ_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
357; VBITS_EQ_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
358; VBITS_EQ_256-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
359; VBITS_EQ_256-NEXT:    movprfx z1, z2
360; VBITS_EQ_256-NEXT:    fminnm z1.h, p0/m, z1.h, z3.h
361; VBITS_EQ_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
362; VBITS_EQ_256-NEXT:    st1h { z1.h }, p0, [x0]
363; VBITS_EQ_256-NEXT:    ret
364;
365; VBITS_GE_512-LABEL: fminnm_v32f16:
366; VBITS_GE_512:       // %bb.0:
367; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
368; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
369; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
370; VBITS_GE_512-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
371; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
372; VBITS_GE_512-NEXT:    ret
373  %op1 = load <32 x half>, ptr %a
374  %op2 = load <32 x half>, ptr %b
375  %res = call <32 x half> @llvm.minnum.v32f16(<32 x half> %op1, <32 x half> %op2)
376  store <32 x half> %res, ptr %a
377  ret void
378}
379
380define void @fminnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
381; CHECK-LABEL: fminnm_v64f16:
382; CHECK:       // %bb.0:
383; CHECK-NEXT:    ptrue p0.h, vl64
384; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
385; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
386; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
387; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
388; CHECK-NEXT:    ret
389  %op1 = load <64 x half>, ptr %a
390  %op2 = load <64 x half>, ptr %b
391  %res = call <64 x half> @llvm.minnum.v64f16(<64 x half> %op1, <64 x half> %op2)
392  store <64 x half> %res, ptr %a
393  ret void
394}
395
396define void @fminnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
397; CHECK-LABEL: fminnm_v128f16:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    ptrue p0.h, vl128
400; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
401; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
402; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
403; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
404; CHECK-NEXT:    ret
405  %op1 = load <128 x half>, ptr %a
406  %op2 = load <128 x half>, ptr %b
407  %res = call <128 x half> @llvm.minnum.v128f16(<128 x half> %op1, <128 x half> %op2)
408  store <128 x half> %res, ptr %a
409  ret void
410}
411
412; Don't use SVE for 64-bit vectors.
413define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
414; CHECK-LABEL: fminnm_v2f32:
415; CHECK:       // %bb.0:
416; CHECK-NEXT:    fminnm v0.2s, v0.2s, v1.2s
417; CHECK-NEXT:    ret
418  %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2)
419  ret <2 x float> %res
420}
421
422; Don't use SVE for 128-bit vectors.
423define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
424; CHECK-LABEL: fminnm_v4f32:
425; CHECK:       // %bb.0:
426; CHECK-NEXT:    fminnm v0.4s, v0.4s, v1.4s
427; CHECK-NEXT:    ret
428  %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2)
429  ret <4 x float> %res
430}
431
432define void @fminnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
433; CHECK-LABEL: fminnm_v8f32:
434; CHECK:       // %bb.0:
435; CHECK-NEXT:    ptrue p0.s, vl8
436; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
437; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
438; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
439; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
440; CHECK-NEXT:    ret
441  %op1 = load <8 x float>, ptr %a
442  %op2 = load <8 x float>, ptr %b
443  %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2)
444  store <8 x float> %res, ptr %a
445  ret void
446}
447
448define void @fminnm_v16f32(ptr %a, ptr %b) #0 {
449; VBITS_EQ_256-LABEL: fminnm_v16f32:
450; VBITS_EQ_256:       // %bb.0:
451; VBITS_EQ_256-NEXT:    ptrue p0.s, vl8
452; VBITS_EQ_256-NEXT:    mov x8, #8 // =0x8
453; VBITS_EQ_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
454; VBITS_EQ_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
455; VBITS_EQ_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
456; VBITS_EQ_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
457; VBITS_EQ_256-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
458; VBITS_EQ_256-NEXT:    movprfx z1, z2
459; VBITS_EQ_256-NEXT:    fminnm z1.s, p0/m, z1.s, z3.s
460; VBITS_EQ_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
461; VBITS_EQ_256-NEXT:    st1w { z1.s }, p0, [x0]
462; VBITS_EQ_256-NEXT:    ret
463;
464; VBITS_GE_512-LABEL: fminnm_v16f32:
465; VBITS_GE_512:       // %bb.0:
466; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
467; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
468; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
469; VBITS_GE_512-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
470; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
471; VBITS_GE_512-NEXT:    ret
472  %op1 = load <16 x float>, ptr %a
473  %op2 = load <16 x float>, ptr %b
474  %res = call <16 x float> @llvm.minnum.v16f32(<16 x float> %op1, <16 x float> %op2)
475  store <16 x float> %res, ptr %a
476  ret void
477}
478
479define void @fminnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
480; CHECK-LABEL: fminnm_v32f32:
481; CHECK:       // %bb.0:
482; CHECK-NEXT:    ptrue p0.s, vl32
483; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
484; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
485; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
486; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
487; CHECK-NEXT:    ret
488  %op1 = load <32 x float>, ptr %a
489  %op2 = load <32 x float>, ptr %b
490  %res = call <32 x float> @llvm.minnum.v32f32(<32 x float> %op1, <32 x float> %op2)
491  store <32 x float> %res, ptr %a
492  ret void
493}
494
495define void @fminnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
496; CHECK-LABEL: fminnm_v64f32:
497; CHECK:       // %bb.0:
498; CHECK-NEXT:    ptrue p0.s, vl64
499; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
500; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
501; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
502; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
503; CHECK-NEXT:    ret
504  %op1 = load <64 x float>, ptr %a
505  %op2 = load <64 x float>, ptr %b
506  %res = call <64 x float> @llvm.minnum.v64f32(<64 x float> %op1, <64 x float> %op2)
507  store <64 x float> %res, ptr %a
508  ret void
509}
510
511; Don't use SVE for 64-bit vectors.
512define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
513; CHECK-LABEL: fminnm_v1f64:
514; CHECK:       // %bb.0:
515; CHECK-NEXT:    fminnm d0, d0, d1
516; CHECK-NEXT:    ret
517  %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
518  ret <1 x double> %res
519}
520
521; Don't use SVE for 128-bit vectors.
522define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
523; CHECK-LABEL: fminnm_v2f64:
524; CHECK:       // %bb.0:
525; CHECK-NEXT:    fminnm v0.2d, v0.2d, v1.2d
526; CHECK-NEXT:    ret
527  %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2)
528  ret <2 x double> %res
529}
530
531define void @fminnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
532; CHECK-LABEL: fminnm_v4f64:
533; CHECK:       // %bb.0:
534; CHECK-NEXT:    ptrue p0.d, vl4
535; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
536; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
537; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
538; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
539; CHECK-NEXT:    ret
540  %op1 = load <4 x double>, ptr %a
541  %op2 = load <4 x double>, ptr %b
542  %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2)
543  store <4 x double> %res, ptr %a
544  ret void
545}
546
547define void @fminnm_v8f64(ptr %a, ptr %b) #0 {
548; VBITS_EQ_256-LABEL: fminnm_v8f64:
549; VBITS_EQ_256:       // %bb.0:
550; VBITS_EQ_256-NEXT:    ptrue p0.d, vl4
551; VBITS_EQ_256-NEXT:    mov x8, #4 // =0x4
552; VBITS_EQ_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
553; VBITS_EQ_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
554; VBITS_EQ_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
555; VBITS_EQ_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
556; VBITS_EQ_256-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
557; VBITS_EQ_256-NEXT:    movprfx z1, z2
558; VBITS_EQ_256-NEXT:    fminnm z1.d, p0/m, z1.d, z3.d
559; VBITS_EQ_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
560; VBITS_EQ_256-NEXT:    st1d { z1.d }, p0, [x0]
561; VBITS_EQ_256-NEXT:    ret
562;
563; VBITS_GE_512-LABEL: fminnm_v8f64:
564; VBITS_GE_512:       // %bb.0:
565; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
566; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
567; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
568; VBITS_GE_512-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
569; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
570; VBITS_GE_512-NEXT:    ret
571  %op1 = load <8 x double>, ptr %a
572  %op2 = load <8 x double>, ptr %b
573  %res = call <8 x double> @llvm.minnum.v8f64(<8 x double> %op1, <8 x double> %op2)
574  store <8 x double> %res, ptr %a
575  ret void
576}
577
578define void @fminnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
579; CHECK-LABEL: fminnm_v16f64:
580; CHECK:       // %bb.0:
581; CHECK-NEXT:    ptrue p0.d, vl16
582; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
583; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
584; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
585; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
586; CHECK-NEXT:    ret
587  %op1 = load <16 x double>, ptr %a
588  %op2 = load <16 x double>, ptr %b
589  %res = call <16 x double> @llvm.minnum.v16f64(<16 x double> %op1, <16 x double> %op2)
590  store <16 x double> %res, ptr %a
591  ret void
592}
593
594define void @fminnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
595; CHECK-LABEL: fminnm_v32f64:
596; CHECK:       // %bb.0:
597; CHECK-NEXT:    ptrue p0.d, vl32
598; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
599; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
600; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
601; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
602; CHECK-NEXT:    ret
603  %op1 = load <32 x double>, ptr %a
604  %op2 = load <32 x double>, ptr %b
605  %res = call <32 x double> @llvm.minnum.v32f64(<32 x double> %op1, <32 x double> %op2)
606  store <32 x double> %res, ptr %a
607  ret void
608}
609
610;
611; FMAX
612;
613
614; Don't use SVE for 64-bit vectors.
615define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
616; CHECK-LABEL: fmax_v4f16:
617; CHECK:       // %bb.0:
618; CHECK-NEXT:    fmax v0.4h, v0.4h, v1.4h
619; CHECK-NEXT:    ret
620  %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2)
621  ret <4 x half> %res
622}
623
624; Don't use SVE for 128-bit vectors.
625define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
626; CHECK-LABEL: fmax_v8f16:
627; CHECK:       // %bb.0:
628; CHECK-NEXT:    fmax v0.8h, v0.8h, v1.8h
629; CHECK-NEXT:    ret
630  %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2)
631  ret <8 x half> %res
632}
633
634define void @fmax_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
635; CHECK-LABEL: fmax_v16f16:
636; CHECK:       // %bb.0:
637; CHECK-NEXT:    ptrue p0.h, vl16
638; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
639; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
640; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
641; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
642; CHECK-NEXT:    ret
643  %op1 = load <16 x half>, ptr %a
644  %op2 = load <16 x half>, ptr %b
645  %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2)
646  store <16 x half> %res, ptr %a
647  ret void
648}
649
650define void @fmax_v32f16(ptr %a, ptr %b) #0 {
651; VBITS_EQ_256-LABEL: fmax_v32f16:
652; VBITS_EQ_256:       // %bb.0:
653; VBITS_EQ_256-NEXT:    ptrue p0.h, vl16
654; VBITS_EQ_256-NEXT:    mov x8, #16 // =0x10
655; VBITS_EQ_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
656; VBITS_EQ_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
657; VBITS_EQ_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
658; VBITS_EQ_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
659; VBITS_EQ_256-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
660; VBITS_EQ_256-NEXT:    movprfx z1, z2
661; VBITS_EQ_256-NEXT:    fmax z1.h, p0/m, z1.h, z3.h
662; VBITS_EQ_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
663; VBITS_EQ_256-NEXT:    st1h { z1.h }, p0, [x0]
664; VBITS_EQ_256-NEXT:    ret
665;
666; VBITS_GE_512-LABEL: fmax_v32f16:
667; VBITS_GE_512:       // %bb.0:
668; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
669; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
670; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
671; VBITS_GE_512-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
672; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
673; VBITS_GE_512-NEXT:    ret
674  %op1 = load <32 x half>, ptr %a
675  %op2 = load <32 x half>, ptr %b
676  %res = call <32 x half> @llvm.maximum.v32f16(<32 x half> %op1, <32 x half> %op2)
677  store <32 x half> %res, ptr %a
678  ret void
679}
680
681define void @fmax_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
682; CHECK-LABEL: fmax_v64f16:
683; CHECK:       // %bb.0:
684; CHECK-NEXT:    ptrue p0.h, vl64
685; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
686; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
687; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
688; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
689; CHECK-NEXT:    ret
690  %op1 = load <64 x half>, ptr %a
691  %op2 = load <64 x half>, ptr %b
692  %res = call <64 x half> @llvm.maximum.v64f16(<64 x half> %op1, <64 x half> %op2)
693  store <64 x half> %res, ptr %a
694  ret void
695}
696
697define void @fmax_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
698; CHECK-LABEL: fmax_v128f16:
699; CHECK:       // %bb.0:
700; CHECK-NEXT:    ptrue p0.h, vl128
701; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
702; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
703; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
704; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
705; CHECK-NEXT:    ret
706  %op1 = load <128 x half>, ptr %a
707  %op2 = load <128 x half>, ptr %b
708  %res = call <128 x half> @llvm.maximum.v128f16(<128 x half> %op1, <128 x half> %op2)
709  store <128 x half> %res, ptr %a
710  ret void
711}
712
713; Don't use SVE for 64-bit vectors.
714define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
715; CHECK-LABEL: fmax_v2f32:
716; CHECK:       // %bb.0:
717; CHECK-NEXT:    fmax v0.2s, v0.2s, v1.2s
718; CHECK-NEXT:    ret
719  %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2)
720  ret <2 x float> %res
721}
722
723; Don't use SVE for 128-bit vectors.
724define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
725; CHECK-LABEL: fmax_v4f32:
726; CHECK:       // %bb.0:
727; CHECK-NEXT:    fmax v0.4s, v0.4s, v1.4s
728; CHECK-NEXT:    ret
729  %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2)
730  ret <4 x float> %res
731}
732
733define void @fmax_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
734; CHECK-LABEL: fmax_v8f32:
735; CHECK:       // %bb.0:
736; CHECK-NEXT:    ptrue p0.s, vl8
737; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
738; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
739; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
740; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
741; CHECK-NEXT:    ret
742  %op1 = load <8 x float>, ptr %a
743  %op2 = load <8 x float>, ptr %b
744  %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2)
745  store <8 x float> %res, ptr %a
746  ret void
747}
748
749define void @fmax_v16f32(ptr %a, ptr %b) #0 {
750; VBITS_EQ_256-LABEL: fmax_v16f32:
751; VBITS_EQ_256:       // %bb.0:
752; VBITS_EQ_256-NEXT:    ptrue p0.s, vl8
753; VBITS_EQ_256-NEXT:    mov x8, #8 // =0x8
754; VBITS_EQ_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
755; VBITS_EQ_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
756; VBITS_EQ_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
757; VBITS_EQ_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
758; VBITS_EQ_256-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
759; VBITS_EQ_256-NEXT:    movprfx z1, z2
760; VBITS_EQ_256-NEXT:    fmax z1.s, p0/m, z1.s, z3.s
761; VBITS_EQ_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
762; VBITS_EQ_256-NEXT:    st1w { z1.s }, p0, [x0]
763; VBITS_EQ_256-NEXT:    ret
764;
765; VBITS_GE_512-LABEL: fmax_v16f32:
766; VBITS_GE_512:       // %bb.0:
767; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
768; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
769; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
770; VBITS_GE_512-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
771; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
772; VBITS_GE_512-NEXT:    ret
773  %op1 = load <16 x float>, ptr %a
774  %op2 = load <16 x float>, ptr %b
775  %res = call <16 x float> @llvm.maximum.v16f32(<16 x float> %op1, <16 x float> %op2)
776  store <16 x float> %res, ptr %a
777  ret void
778}
779
780define void @fmax_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
781; CHECK-LABEL: fmax_v32f32:
782; CHECK:       // %bb.0:
783; CHECK-NEXT:    ptrue p0.s, vl32
784; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
785; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
786; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
787; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
788; CHECK-NEXT:    ret
789  %op1 = load <32 x float>, ptr %a
790  %op2 = load <32 x float>, ptr %b
791  %res = call <32 x float> @llvm.maximum.v32f32(<32 x float> %op1, <32 x float> %op2)
792  store <32 x float> %res, ptr %a
793  ret void
794}
795
796define void @fmax_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
797; CHECK-LABEL: fmax_v64f32:
798; CHECK:       // %bb.0:
799; CHECK-NEXT:    ptrue p0.s, vl64
800; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
801; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
802; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
803; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
804; CHECK-NEXT:    ret
805  %op1 = load <64 x float>, ptr %a
806  %op2 = load <64 x float>, ptr %b
807  %res = call <64 x float> @llvm.maximum.v64f32(<64 x float> %op1, <64 x float> %op2)
808  store <64 x float> %res, ptr %a
809  ret void
810}
811
812; Don't use SVE for 64-bit vectors.
813define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
814; CHECK-LABEL: fmax_v1f64:
815; CHECK:       // %bb.0:
816; CHECK-NEXT:    fmax d0, d0, d1
817; CHECK-NEXT:    ret
818  %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
819  ret <1 x double> %res
820}
821
822; Don't use SVE for 128-bit vectors.
823define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
824; CHECK-LABEL: fmax_v2f64:
825; CHECK:       // %bb.0:
826; CHECK-NEXT:    fmax v0.2d, v0.2d, v1.2d
827; CHECK-NEXT:    ret
828  %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2)
829  ret <2 x double> %res
830}
831
832define void @fmax_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
833; CHECK-LABEL: fmax_v4f64:
834; CHECK:       // %bb.0:
835; CHECK-NEXT:    ptrue p0.d, vl4
836; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
837; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
838; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
839; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
840; CHECK-NEXT:    ret
841  %op1 = load <4 x double>, ptr %a
842  %op2 = load <4 x double>, ptr %b
843  %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2)
844  store <4 x double> %res, ptr %a
845  ret void
846}
847
848define void @fmax_v8f64(ptr %a, ptr %b) #0 {
849; VBITS_EQ_256-LABEL: fmax_v8f64:
850; VBITS_EQ_256:       // %bb.0:
851; VBITS_EQ_256-NEXT:    ptrue p0.d, vl4
852; VBITS_EQ_256-NEXT:    mov x8, #4 // =0x4
853; VBITS_EQ_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
854; VBITS_EQ_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
855; VBITS_EQ_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
856; VBITS_EQ_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
857; VBITS_EQ_256-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
858; VBITS_EQ_256-NEXT:    movprfx z1, z2
859; VBITS_EQ_256-NEXT:    fmax z1.d, p0/m, z1.d, z3.d
860; VBITS_EQ_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
861; VBITS_EQ_256-NEXT:    st1d { z1.d }, p0, [x0]
862; VBITS_EQ_256-NEXT:    ret
863;
864; VBITS_GE_512-LABEL: fmax_v8f64:
865; VBITS_GE_512:       // %bb.0:
866; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
867; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
868; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
869; VBITS_GE_512-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
870; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
871; VBITS_GE_512-NEXT:    ret
872  %op1 = load <8 x double>, ptr %a
873  %op2 = load <8 x double>, ptr %b
874  %res = call <8 x double> @llvm.maximum.v8f64(<8 x double> %op1, <8 x double> %op2)
875  store <8 x double> %res, ptr %a
876  ret void
877}
878
879define void @fmax_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
880; CHECK-LABEL: fmax_v16f64:
881; CHECK:       // %bb.0:
882; CHECK-NEXT:    ptrue p0.d, vl16
883; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
884; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
885; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
886; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
887; CHECK-NEXT:    ret
888  %op1 = load <16 x double>, ptr %a
889  %op2 = load <16 x double>, ptr %b
890  %res = call <16 x double> @llvm.maximum.v16f64(<16 x double> %op1, <16 x double> %op2)
891  store <16 x double> %res, ptr %a
892  ret void
893}
894
895define void @fmax_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
896; CHECK-LABEL: fmax_v32f64:
897; CHECK:       // %bb.0:
898; CHECK-NEXT:    ptrue p0.d, vl32
899; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
900; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
901; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
902; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
903; CHECK-NEXT:    ret
904  %op1 = load <32 x double>, ptr %a
905  %op2 = load <32 x double>, ptr %b
906  %res = call <32 x double> @llvm.maximum.v32f64(<32 x double> %op1, <32 x double> %op2)
907  store <32 x double> %res, ptr %a
908  ret void
909}
910
911;
912; FMIN
913;
914
915; Don't use SVE for 64-bit vectors.
916define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
917; CHECK-LABEL: fmin_v4f16:
918; CHECK:       // %bb.0:
919; CHECK-NEXT:    fmin v0.4h, v0.4h, v1.4h
920; CHECK-NEXT:    ret
921  %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2)
922  ret <4 x half> %res
923}
924
925; Don't use SVE for 128-bit vectors.
926define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
927; CHECK-LABEL: fmin_v8f16:
928; CHECK:       // %bb.0:
929; CHECK-NEXT:    fmin v0.8h, v0.8h, v1.8h
930; CHECK-NEXT:    ret
931  %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2)
932  ret <8 x half> %res
933}
934
935define void @fmin_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
936; CHECK-LABEL: fmin_v16f16:
937; CHECK:       // %bb.0:
938; CHECK-NEXT:    ptrue p0.h, vl16
939; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
940; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
941; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
942; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
943; CHECK-NEXT:    ret
944  %op1 = load <16 x half>, ptr %a
945  %op2 = load <16 x half>, ptr %b
946  %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2)
947  store <16 x half> %res, ptr %a
948  ret void
949}
950
951define void @fmin_v32f16(ptr %a, ptr %b) #0 {
952; VBITS_EQ_256-LABEL: fmin_v32f16:
953; VBITS_EQ_256:       // %bb.0:
954; VBITS_EQ_256-NEXT:    ptrue p0.h, vl16
955; VBITS_EQ_256-NEXT:    mov x8, #16 // =0x10
956; VBITS_EQ_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
957; VBITS_EQ_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
958; VBITS_EQ_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
959; VBITS_EQ_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
960; VBITS_EQ_256-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
961; VBITS_EQ_256-NEXT:    movprfx z1, z2
962; VBITS_EQ_256-NEXT:    fmin z1.h, p0/m, z1.h, z3.h
963; VBITS_EQ_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
964; VBITS_EQ_256-NEXT:    st1h { z1.h }, p0, [x0]
965; VBITS_EQ_256-NEXT:    ret
966;
967; VBITS_GE_512-LABEL: fmin_v32f16:
968; VBITS_GE_512:       // %bb.0:
969; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
970; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
971; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
972; VBITS_GE_512-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
973; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
974; VBITS_GE_512-NEXT:    ret
975  %op1 = load <32 x half>, ptr %a
976  %op2 = load <32 x half>, ptr %b
977  %res = call <32 x half> @llvm.minimum.v32f16(<32 x half> %op1, <32 x half> %op2)
978  store <32 x half> %res, ptr %a
979  ret void
980}
981
982define void @fmin_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
983; CHECK-LABEL: fmin_v64f16:
984; CHECK:       // %bb.0:
985; CHECK-NEXT:    ptrue p0.h, vl64
986; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
987; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
988; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
989; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
990; CHECK-NEXT:    ret
991  %op1 = load <64 x half>, ptr %a
992  %op2 = load <64 x half>, ptr %b
993  %res = call <64 x half> @llvm.minimum.v64f16(<64 x half> %op1, <64 x half> %op2)
994  store <64 x half> %res, ptr %a
995  ret void
996}
997
998define void @fmin_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
999; CHECK-LABEL: fmin_v128f16:
1000; CHECK:       // %bb.0:
1001; CHECK-NEXT:    ptrue p0.h, vl128
1002; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1003; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
1004; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1005; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
1006; CHECK-NEXT:    ret
1007  %op1 = load <128 x half>, ptr %a
1008  %op2 = load <128 x half>, ptr %b
1009  %res = call <128 x half> @llvm.minimum.v128f16(<128 x half> %op1, <128 x half> %op2)
1010  store <128 x half> %res, ptr %a
1011  ret void
1012}
1013
1014; Don't use SVE for 64-bit vectors.
1015define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
1016; CHECK-LABEL: fmin_v2f32:
1017; CHECK:       // %bb.0:
1018; CHECK-NEXT:    fmin v0.2s, v0.2s, v1.2s
1019; CHECK-NEXT:    ret
1020  %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2)
1021  ret <2 x float> %res
1022}
1023
1024; Don't use SVE for 128-bit vectors.
1025define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
1026; CHECK-LABEL: fmin_v4f32:
1027; CHECK:       // %bb.0:
1028; CHECK-NEXT:    fmin v0.4s, v0.4s, v1.4s
1029; CHECK-NEXT:    ret
1030  %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2)
1031  ret <4 x float> %res
1032}
1033
1034define void @fmin_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1035; CHECK-LABEL: fmin_v8f32:
1036; CHECK:       // %bb.0:
1037; CHECK-NEXT:    ptrue p0.s, vl8
1038; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1039; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1040; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1041; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1042; CHECK-NEXT:    ret
1043  %op1 = load <8 x float>, ptr %a
1044  %op2 = load <8 x float>, ptr %b
1045  %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2)
1046  store <8 x float> %res, ptr %a
1047  ret void
1048}
1049
1050define void @fmin_v16f32(ptr %a, ptr %b) #0 {
1051; VBITS_EQ_256-LABEL: fmin_v16f32:
1052; VBITS_EQ_256:       // %bb.0:
1053; VBITS_EQ_256-NEXT:    ptrue p0.s, vl8
1054; VBITS_EQ_256-NEXT:    mov x8, #8 // =0x8
1055; VBITS_EQ_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1056; VBITS_EQ_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
1057; VBITS_EQ_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
1058; VBITS_EQ_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
1059; VBITS_EQ_256-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1060; VBITS_EQ_256-NEXT:    movprfx z1, z2
1061; VBITS_EQ_256-NEXT:    fmin z1.s, p0/m, z1.s, z3.s
1062; VBITS_EQ_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
1063; VBITS_EQ_256-NEXT:    st1w { z1.s }, p0, [x0]
1064; VBITS_EQ_256-NEXT:    ret
1065;
1066; VBITS_GE_512-LABEL: fmin_v16f32:
1067; VBITS_GE_512:       // %bb.0:
1068; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1069; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1070; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
1071; VBITS_GE_512-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1072; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
1073; VBITS_GE_512-NEXT:    ret
1074  %op1 = load <16 x float>, ptr %a
1075  %op2 = load <16 x float>, ptr %b
1076  %res = call <16 x float> @llvm.minimum.v16f32(<16 x float> %op1, <16 x float> %op2)
1077  store <16 x float> %res, ptr %a
1078  ret void
1079}
1080
1081define void @fmin_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1082; CHECK-LABEL: fmin_v32f32:
1083; CHECK:       // %bb.0:
1084; CHECK-NEXT:    ptrue p0.s, vl32
1085; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1086; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1087; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1088; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1089; CHECK-NEXT:    ret
1090  %op1 = load <32 x float>, ptr %a
1091  %op2 = load <32 x float>, ptr %b
1092  %res = call <32 x float> @llvm.minimum.v32f32(<32 x float> %op1, <32 x float> %op2)
1093  store <32 x float> %res, ptr %a
1094  ret void
1095}
1096
1097define void @fmin_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1098; CHECK-LABEL: fmin_v64f32:
1099; CHECK:       // %bb.0:
1100; CHECK-NEXT:    ptrue p0.s, vl64
1101; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1102; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1103; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1104; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1105; CHECK-NEXT:    ret
1106  %op1 = load <64 x float>, ptr %a
1107  %op2 = load <64 x float>, ptr %b
1108  %res = call <64 x float> @llvm.minimum.v64f32(<64 x float> %op1, <64 x float> %op2)
1109  store <64 x float> %res, ptr %a
1110  ret void
1111}
1112
1113; Don't use SVE for 64-bit vectors.
1114define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
1115; CHECK-LABEL: fmin_v1f64:
1116; CHECK:       // %bb.0:
1117; CHECK-NEXT:    fmin d0, d0, d1
1118; CHECK-NEXT:    ret
1119  %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
1120  ret <1 x double> %res
1121}
1122
1123; Don't use SVE for 128-bit vectors.
1124define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
1125; CHECK-LABEL: fmin_v2f64:
1126; CHECK:       // %bb.0:
1127; CHECK-NEXT:    fmin v0.2d, v0.2d, v1.2d
1128; CHECK-NEXT:    ret
1129  %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2)
1130  ret <2 x double> %res
1131}
1132
1133define void @fmin_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1134; CHECK-LABEL: fmin_v4f64:
1135; CHECK:       // %bb.0:
1136; CHECK-NEXT:    ptrue p0.d, vl4
1137; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1138; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1139; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1140; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1141; CHECK-NEXT:    ret
1142  %op1 = load <4 x double>, ptr %a
1143  %op2 = load <4 x double>, ptr %b
1144  %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2)
1145  store <4 x double> %res, ptr %a
1146  ret void
1147}
1148
1149define void @fmin_v8f64(ptr %a, ptr %b) #0 {
1150; VBITS_EQ_256-LABEL: fmin_v8f64:
1151; VBITS_EQ_256:       // %bb.0:
1152; VBITS_EQ_256-NEXT:    ptrue p0.d, vl4
1153; VBITS_EQ_256-NEXT:    mov x8, #4 // =0x4
1154; VBITS_EQ_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1155; VBITS_EQ_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
1156; VBITS_EQ_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
1157; VBITS_EQ_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
1158; VBITS_EQ_256-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1159; VBITS_EQ_256-NEXT:    movprfx z1, z2
1160; VBITS_EQ_256-NEXT:    fmin z1.d, p0/m, z1.d, z3.d
1161; VBITS_EQ_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1162; VBITS_EQ_256-NEXT:    st1d { z1.d }, p0, [x0]
1163; VBITS_EQ_256-NEXT:    ret
1164;
1165; VBITS_GE_512-LABEL: fmin_v8f64:
1166; VBITS_GE_512:       // %bb.0:
1167; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1168; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1169; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
1170; VBITS_GE_512-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1171; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1172; VBITS_GE_512-NEXT:    ret
1173  %op1 = load <8 x double>, ptr %a
1174  %op2 = load <8 x double>, ptr %b
1175  %res = call <8 x double> @llvm.minimum.v8f64(<8 x double> %op1, <8 x double> %op2)
1176  store <8 x double> %res, ptr %a
1177  ret void
1178}
1179
1180define void @fmin_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1181; CHECK-LABEL: fmin_v16f64:
1182; CHECK:       // %bb.0:
1183; CHECK-NEXT:    ptrue p0.d, vl16
1184; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1185; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1186; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1187; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1188; CHECK-NEXT:    ret
1189  %op1 = load <16 x double>, ptr %a
1190  %op2 = load <16 x double>, ptr %b
1191  %res = call <16 x double> @llvm.minimum.v16f64(<16 x double> %op1, <16 x double> %op2)
1192  store <16 x double> %res, ptr %a
1193  ret void
1194}
1195
1196define void @fmin_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1197; CHECK-LABEL: fmin_v32f64:
1198; CHECK:       // %bb.0:
1199; CHECK-NEXT:    ptrue p0.d, vl32
1200; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1201; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1202; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1203; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1204; CHECK-NEXT:    ret
1205  %op1 = load <32 x double>, ptr %a
1206  %op2 = load <32 x double>, ptr %b
1207  %res = call <32 x double> @llvm.minimum.v32f64(<32 x double> %op1, <32 x double> %op2)
1208  store <32 x double> %res, ptr %a
1209  ret void
1210}
1211
1212attributes #0 = { "target-features"="+sve" }
1213
1214declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
1215declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)
1216declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>)
1217declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>)
1218declare <64 x half> @llvm.minnum.v64f16(<64 x half>, <64 x half>)
1219declare <128 x half> @llvm.minnum.v128f16(<128 x half>, <128 x half>)
1220declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
1221declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1222declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
1223declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>)
1224declare <32 x float> @llvm.minnum.v32f32(<32 x float>, <32 x float>)
1225declare <64 x float> @llvm.minnum.v64f32(<64 x float>, <64 x float>)
1226declare <1 x double> @llvm.minnum.v1f64(<1 x double>, <1 x double>)
1227declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
1228declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
1229declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
1230declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>)
1231declare <32 x double> @llvm.minnum.v32f64(<32 x double>, <32 x double>)
1232
1233declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
1234declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
1235declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>)
1236declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>)
1237declare <64 x half> @llvm.maxnum.v64f16(<64 x half>, <64 x half>)
1238declare <128 x half> @llvm.maxnum.v128f16(<128 x half>, <128 x half>)
1239declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
1240declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1241declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
1242declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>)
1243declare <32 x float> @llvm.maxnum.v32f32(<32 x float>, <32 x float>)
1244declare <64 x float> @llvm.maxnum.v64f32(<64 x float>, <64 x float>)
1245declare <1 x double> @llvm.maxnum.v1f64(<1 x double>, <1 x double>)
1246declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
1247declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
1248declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
1249declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>)
1250declare <32 x double> @llvm.maxnum.v32f64(<32 x double>, <32 x double>)
1251
1252declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
1253declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
1254declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>)
1255declare <32 x half> @llvm.minimum.v32f16(<32 x half>, <32 x half>)
1256declare <64 x half> @llvm.minimum.v64f16(<64 x half>, <64 x half>)
1257declare <128 x half> @llvm.minimum.v128f16(<128 x half>, <128 x half>)
1258declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
1259declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1260declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>)
1261declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
1262declare <32 x float> @llvm.minimum.v32f32(<32 x float>, <32 x float>)
1263declare <64 x float> @llvm.minimum.v64f32(<64 x float>, <64 x float>)
1264declare <1 x double> @llvm.minimum.v1f64(<1 x double>, <1 x double>)
1265declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1266declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
1267declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>)
1268declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
1269declare <32 x double> @llvm.minimum.v32f64(<32 x double>, <32 x double>)
1270
1271declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
1272declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
1273declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
1274declare <32 x half> @llvm.maximum.v32f16(<32 x half>, <32 x half>)
1275declare <64 x half> @llvm.maximum.v64f16(<64 x half>, <64 x half>)
1276declare <128 x half> @llvm.maximum.v128f16(<128 x half>, <128 x half>)
1277declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
1278declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1279declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
1280declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
1281declare <32 x float> @llvm.maximum.v32f32(<32 x float>, <32 x float>)
1282declare <64 x float> @llvm.maximum.v64f32(<64 x float>, <64 x float>)
1283declare <1 x double> @llvm.maximum.v1f64(<1 x double>, <1 x double>)
1284declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1285declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
1286declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>)
1287declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
1288declare <32 x double> @llvm.maximum.v32f64(<32 x double>, <32 x double>)
1289