xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-vmax.ll (revision 1c87d5c4fc55cdd67bc879d918480148e64016be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
4
5define <8 x i8> @smax_8b(ptr %A, ptr %B) nounwind {
6; CHECK-LABEL: smax_8b:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    ldr d0, [x0]
9; CHECK-NEXT:    ldr d1, [x1]
10; CHECK-NEXT:    smax.8b v0, v0, v1
11; CHECK-NEXT:    ret
12	%tmp1 = load <8 x i8>, ptr %A
13	%tmp2 = load <8 x i8>, ptr %B
14	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
15	ret <8 x i8> %tmp3
16}
17
18define <16 x i8> @smax_16b(ptr %A, ptr %B) nounwind {
19; CHECK-LABEL: smax_16b:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    ldr q0, [x0]
22; CHECK-NEXT:    ldr q1, [x1]
23; CHECK-NEXT:    smax.16b v0, v0, v1
24; CHECK-NEXT:    ret
25	%tmp1 = load <16 x i8>, ptr %A
26	%tmp2 = load <16 x i8>, ptr %B
27	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
28	ret <16 x i8> %tmp3
29}
30
31define <4 x i16> @smax_4h(ptr %A, ptr %B) nounwind {
32; CHECK-LABEL: smax_4h:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    ldr d0, [x0]
35; CHECK-NEXT:    ldr d1, [x1]
36; CHECK-NEXT:    smax.4h v0, v0, v1
37; CHECK-NEXT:    ret
38	%tmp1 = load <4 x i16>, ptr %A
39	%tmp2 = load <4 x i16>, ptr %B
40	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
41	ret <4 x i16> %tmp3
42}
43
44define <8 x i16> @smax_8h(ptr %A, ptr %B) nounwind {
45; CHECK-LABEL: smax_8h:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    ldr q0, [x0]
48; CHECK-NEXT:    ldr q1, [x1]
49; CHECK-NEXT:    smax.8h v0, v0, v1
50; CHECK-NEXT:    ret
51	%tmp1 = load <8 x i16>, ptr %A
52	%tmp2 = load <8 x i16>, ptr %B
53	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
54	ret <8 x i16> %tmp3
55}
56
57define <2 x i32> @smax_2s(ptr %A, ptr %B) nounwind {
58; CHECK-LABEL: smax_2s:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    ldr d0, [x0]
61; CHECK-NEXT:    ldr d1, [x1]
62; CHECK-NEXT:    smax.2s v0, v0, v1
63; CHECK-NEXT:    ret
64	%tmp1 = load <2 x i32>, ptr %A
65	%tmp2 = load <2 x i32>, ptr %B
66	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
67	ret <2 x i32> %tmp3
68}
69
70define <4 x i32> @smax_4s(ptr %A, ptr %B) nounwind {
71; CHECK-LABEL: smax_4s:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    ldr q0, [x0]
74; CHECK-NEXT:    ldr q1, [x1]
75; CHECK-NEXT:    smax.4s v0, v0, v1
76; CHECK-NEXT:    ret
77	%tmp1 = load <4 x i32>, ptr %A
78	%tmp2 = load <4 x i32>, ptr %B
79	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
80	ret <4 x i32> %tmp3
81}
82
83declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
84declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
85declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
86declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
87declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
88declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
89
90define <8 x i8> @umax_8b(ptr %A, ptr %B) nounwind {
91; CHECK-LABEL: umax_8b:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    ldr d0, [x0]
94; CHECK-NEXT:    ldr d1, [x1]
95; CHECK-NEXT:    umax.8b v0, v0, v1
96; CHECK-NEXT:    ret
97	%tmp1 = load <8 x i8>, ptr %A
98	%tmp2 = load <8 x i8>, ptr %B
99	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
100	ret <8 x i8> %tmp3
101}
102
103define <16 x i8> @umax_16b(ptr %A, ptr %B) nounwind {
104; CHECK-LABEL: umax_16b:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    ldr q0, [x0]
107; CHECK-NEXT:    ldr q1, [x1]
108; CHECK-NEXT:    umax.16b v0, v0, v1
109; CHECK-NEXT:    ret
110	%tmp1 = load <16 x i8>, ptr %A
111	%tmp2 = load <16 x i8>, ptr %B
112	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
113	ret <16 x i8> %tmp3
114}
115
116define <4 x i16> @umax_4h(ptr %A, ptr %B) nounwind {
117; CHECK-LABEL: umax_4h:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    ldr d0, [x0]
120; CHECK-NEXT:    ldr d1, [x1]
121; CHECK-NEXT:    umax.4h v0, v0, v1
122; CHECK-NEXT:    ret
123	%tmp1 = load <4 x i16>, ptr %A
124	%tmp2 = load <4 x i16>, ptr %B
125	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
126	ret <4 x i16> %tmp3
127}
128
129define <8 x i16> @umax_8h(ptr %A, ptr %B) nounwind {
130; CHECK-LABEL: umax_8h:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ldr q0, [x0]
133; CHECK-NEXT:    ldr q1, [x1]
134; CHECK-NEXT:    umax.8h v0, v0, v1
135; CHECK-NEXT:    ret
136	%tmp1 = load <8 x i16>, ptr %A
137	%tmp2 = load <8 x i16>, ptr %B
138	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
139	ret <8 x i16> %tmp3
140}
141
142define <2 x i32> @umax_2s(ptr %A, ptr %B) nounwind {
143; CHECK-LABEL: umax_2s:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    ldr d0, [x0]
146; CHECK-NEXT:    ldr d1, [x1]
147; CHECK-NEXT:    umax.2s v0, v0, v1
148; CHECK-NEXT:    ret
149	%tmp1 = load <2 x i32>, ptr %A
150	%tmp2 = load <2 x i32>, ptr %B
151	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
152	ret <2 x i32> %tmp3
153}
154
155define <4 x i32> @umax_4s(ptr %A, ptr %B) nounwind {
156; CHECK-LABEL: umax_4s:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    ldr q0, [x0]
159; CHECK-NEXT:    ldr q1, [x1]
160; CHECK-NEXT:    umax.4s v0, v0, v1
161; CHECK-NEXT:    ret
162	%tmp1 = load <4 x i32>, ptr %A
163	%tmp2 = load <4 x i32>, ptr %B
164	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
165	ret <4 x i32> %tmp3
166}
167
168declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
169declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
170declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
171declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
172declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
173declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
174
175define <8 x i8> @smin_8b(ptr %A, ptr %B) nounwind {
176; CHECK-LABEL: smin_8b:
177; CHECK:       // %bb.0:
178; CHECK-NEXT:    ldr d0, [x0]
179; CHECK-NEXT:    ldr d1, [x1]
180; CHECK-NEXT:    smin.8b v0, v0, v1
181; CHECK-NEXT:    ret
182	%tmp1 = load <8 x i8>, ptr %A
183	%tmp2 = load <8 x i8>, ptr %B
184	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
185	ret <8 x i8> %tmp3
186}
187
188define <16 x i8> @smin_16b(ptr %A, ptr %B) nounwind {
189; CHECK-LABEL: smin_16b:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    ldr q0, [x0]
192; CHECK-NEXT:    ldr q1, [x1]
193; CHECK-NEXT:    smin.16b v0, v0, v1
194; CHECK-NEXT:    ret
195	%tmp1 = load <16 x i8>, ptr %A
196	%tmp2 = load <16 x i8>, ptr %B
197	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
198	ret <16 x i8> %tmp3
199}
200
201define <4 x i16> @smin_4h(ptr %A, ptr %B) nounwind {
202; CHECK-LABEL: smin_4h:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    ldr d0, [x0]
205; CHECK-NEXT:    ldr d1, [x1]
206; CHECK-NEXT:    smin.4h v0, v0, v1
207; CHECK-NEXT:    ret
208	%tmp1 = load <4 x i16>, ptr %A
209	%tmp2 = load <4 x i16>, ptr %B
210	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
211	ret <4 x i16> %tmp3
212}
213
214define <8 x i16> @smin_8h(ptr %A, ptr %B) nounwind {
215; CHECK-LABEL: smin_8h:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    ldr q0, [x0]
218; CHECK-NEXT:    ldr q1, [x1]
219; CHECK-NEXT:    smin.8h v0, v0, v1
220; CHECK-NEXT:    ret
221	%tmp1 = load <8 x i16>, ptr %A
222	%tmp2 = load <8 x i16>, ptr %B
223	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
224	ret <8 x i16> %tmp3
225}
226
227define <2 x i32> @smin_2s(ptr %A, ptr %B) nounwind {
228; CHECK-LABEL: smin_2s:
229; CHECK:       // %bb.0:
230; CHECK-NEXT:    ldr d0, [x0]
231; CHECK-NEXT:    ldr d1, [x1]
232; CHECK-NEXT:    smin.2s v0, v0, v1
233; CHECK-NEXT:    ret
234	%tmp1 = load <2 x i32>, ptr %A
235	%tmp2 = load <2 x i32>, ptr %B
236	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
237	ret <2 x i32> %tmp3
238}
239
240define <4 x i32> @smin_4s(ptr %A, ptr %B) nounwind {
241; CHECK-LABEL: smin_4s:
242; CHECK:       // %bb.0:
243; CHECK-NEXT:    ldr q0, [x0]
244; CHECK-NEXT:    ldr q1, [x1]
245; CHECK-NEXT:    smin.4s v0, v0, v1
246; CHECK-NEXT:    ret
247	%tmp1 = load <4 x i32>, ptr %A
248	%tmp2 = load <4 x i32>, ptr %B
249	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
250	ret <4 x i32> %tmp3
251}
252
253declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
254declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
255declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
256declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
257declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
258declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
259
260define <8 x i8> @umin_8b(ptr %A, ptr %B) nounwind {
261; CHECK-LABEL: umin_8b:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    ldr d0, [x0]
264; CHECK-NEXT:    ldr d1, [x1]
265; CHECK-NEXT:    umin.8b v0, v0, v1
266; CHECK-NEXT:    ret
267	%tmp1 = load <8 x i8>, ptr %A
268	%tmp2 = load <8 x i8>, ptr %B
269	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
270	ret <8 x i8> %tmp3
271}
272
273define <16 x i8> @umin_16b(ptr %A, ptr %B) nounwind {
274; CHECK-LABEL: umin_16b:
275; CHECK:       // %bb.0:
276; CHECK-NEXT:    ldr q0, [x0]
277; CHECK-NEXT:    ldr q1, [x1]
278; CHECK-NEXT:    umin.16b v0, v0, v1
279; CHECK-NEXT:    ret
280	%tmp1 = load <16 x i8>, ptr %A
281	%tmp2 = load <16 x i8>, ptr %B
282	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
283	ret <16 x i8> %tmp3
284}
285
286define <4 x i16> @umin_4h(ptr %A, ptr %B) nounwind {
287; CHECK-LABEL: umin_4h:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    ldr d0, [x0]
290; CHECK-NEXT:    ldr d1, [x1]
291; CHECK-NEXT:    umin.4h v0, v0, v1
292; CHECK-NEXT:    ret
293	%tmp1 = load <4 x i16>, ptr %A
294	%tmp2 = load <4 x i16>, ptr %B
295	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
296	ret <4 x i16> %tmp3
297}
298
299define <8 x i16> @umin_8h(ptr %A, ptr %B) nounwind {
300; CHECK-LABEL: umin_8h:
301; CHECK:       // %bb.0:
302; CHECK-NEXT:    ldr q0, [x0]
303; CHECK-NEXT:    ldr q1, [x1]
304; CHECK-NEXT:    umin.8h v0, v0, v1
305; CHECK-NEXT:    ret
306	%tmp1 = load <8 x i16>, ptr %A
307	%tmp2 = load <8 x i16>, ptr %B
308	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
309	ret <8 x i16> %tmp3
310}
311
312define <2 x i32> @umin_2s(ptr %A, ptr %B) nounwind {
313; CHECK-LABEL: umin_2s:
314; CHECK:       // %bb.0:
315; CHECK-NEXT:    ldr d0, [x0]
316; CHECK-NEXT:    ldr d1, [x1]
317; CHECK-NEXT:    umin.2s v0, v0, v1
318; CHECK-NEXT:    ret
319	%tmp1 = load <2 x i32>, ptr %A
320	%tmp2 = load <2 x i32>, ptr %B
321	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
322	ret <2 x i32> %tmp3
323}
324
325define <4 x i32> @umin_4s(ptr %A, ptr %B) nounwind {
326; CHECK-LABEL: umin_4s:
327; CHECK:       // %bb.0:
328; CHECK-NEXT:    ldr q0, [x0]
329; CHECK-NEXT:    ldr q1, [x1]
330; CHECK-NEXT:    umin.4s v0, v0, v1
331; CHECK-NEXT:    ret
332	%tmp1 = load <4 x i32>, ptr %A
333	%tmp2 = load <4 x i32>, ptr %B
334	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
335	ret <4 x i32> %tmp3
336}
337
338declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
339declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
340declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
341declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
342declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
343declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
344
345; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
346
347define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind {
348; CHECK-LABEL: smaxp_8b:
349; CHECK:       // %bb.0:
350; CHECK-NEXT:    ldr d0, [x0]
351; CHECK-NEXT:    ldr d1, [x1]
352; CHECK-NEXT:    smaxp.8b v0, v0, v1
353; CHECK-NEXT:    ret
354	%tmp1 = load <8 x i8>, ptr %A
355	%tmp2 = load <8 x i8>, ptr %B
356	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
357	ret <8 x i8> %tmp3
358}
359
360define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind {
361; CHECK-LABEL: smaxp_16b:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    ldr q0, [x0]
364; CHECK-NEXT:    ldr q1, [x1]
365; CHECK-NEXT:    smaxp.16b v0, v0, v1
366; CHECK-NEXT:    ret
367	%tmp1 = load <16 x i8>, ptr %A
368	%tmp2 = load <16 x i8>, ptr %B
369	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
370	ret <16 x i8> %tmp3
371}
372
373define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind {
374; CHECK-LABEL: smaxp_4h:
375; CHECK:       // %bb.0:
376; CHECK-NEXT:    ldr d0, [x0]
377; CHECK-NEXT:    ldr d1, [x1]
378; CHECK-NEXT:    smaxp.4h v0, v0, v1
379; CHECK-NEXT:    ret
380	%tmp1 = load <4 x i16>, ptr %A
381	%tmp2 = load <4 x i16>, ptr %B
382	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
383	ret <4 x i16> %tmp3
384}
385
386define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind {
387; CHECK-LABEL: smaxp_8h:
388; CHECK:       // %bb.0:
389; CHECK-NEXT:    ldr q0, [x0]
390; CHECK-NEXT:    ldr q1, [x1]
391; CHECK-NEXT:    smaxp.8h v0, v0, v1
392; CHECK-NEXT:    ret
393	%tmp1 = load <8 x i16>, ptr %A
394	%tmp2 = load <8 x i16>, ptr %B
395	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
396	ret <8 x i16> %tmp3
397}
398
399define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind {
400; CHECK-LABEL: smaxp_2s:
401; CHECK:       // %bb.0:
402; CHECK-NEXT:    ldr d0, [x0]
403; CHECK-NEXT:    ldr d1, [x1]
404; CHECK-NEXT:    smaxp.2s v0, v0, v1
405; CHECK-NEXT:    ret
406	%tmp1 = load <2 x i32>, ptr %A
407	%tmp2 = load <2 x i32>, ptr %B
408	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
409	ret <2 x i32> %tmp3
410}
411
412define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind {
413; CHECK-LABEL: smaxp_4s:
414; CHECK:       // %bb.0:
415; CHECK-NEXT:    ldr q0, [x0]
416; CHECK-NEXT:    ldr q1, [x1]
417; CHECK-NEXT:    smaxp.4s v0, v0, v1
418; CHECK-NEXT:    ret
419	%tmp1 = load <4 x i32>, ptr %A
420	%tmp2 = load <4 x i32>, ptr %B
421	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
422	ret <4 x i32> %tmp3
423}
424
425declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
426declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
427declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
428declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
429declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
430declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
431
432define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind {
433; CHECK-LABEL: umaxp_8b:
434; CHECK:       // %bb.0:
435; CHECK-NEXT:    ldr d0, [x0]
436; CHECK-NEXT:    ldr d1, [x1]
437; CHECK-NEXT:    umaxp.8b v0, v0, v1
438; CHECK-NEXT:    ret
439	%tmp1 = load <8 x i8>, ptr %A
440	%tmp2 = load <8 x i8>, ptr %B
441	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
442	ret <8 x i8> %tmp3
443}
444
445define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind {
446; CHECK-LABEL: umaxp_16b:
447; CHECK:       // %bb.0:
448; CHECK-NEXT:    ldr q0, [x0]
449; CHECK-NEXT:    ldr q1, [x1]
450; CHECK-NEXT:    umaxp.16b v0, v0, v1
451; CHECK-NEXT:    ret
452	%tmp1 = load <16 x i8>, ptr %A
453	%tmp2 = load <16 x i8>, ptr %B
454	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
455	ret <16 x i8> %tmp3
456}
457
458define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind {
459; CHECK-LABEL: umaxp_4h:
460; CHECK:       // %bb.0:
461; CHECK-NEXT:    ldr d0, [x0]
462; CHECK-NEXT:    ldr d1, [x1]
463; CHECK-NEXT:    umaxp.4h v0, v0, v1
464; CHECK-NEXT:    ret
465	%tmp1 = load <4 x i16>, ptr %A
466	%tmp2 = load <4 x i16>, ptr %B
467	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
468	ret <4 x i16> %tmp3
469}
470
471define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind {
472; CHECK-LABEL: umaxp_8h:
473; CHECK:       // %bb.0:
474; CHECK-NEXT:    ldr q0, [x0]
475; CHECK-NEXT:    ldr q1, [x1]
476; CHECK-NEXT:    umaxp.8h v0, v0, v1
477; CHECK-NEXT:    ret
478	%tmp1 = load <8 x i16>, ptr %A
479	%tmp2 = load <8 x i16>, ptr %B
480	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
481	ret <8 x i16> %tmp3
482}
483
484define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind {
485; CHECK-LABEL: umaxp_2s:
486; CHECK:       // %bb.0:
487; CHECK-NEXT:    ldr d0, [x0]
488; CHECK-NEXT:    ldr d1, [x1]
489; CHECK-NEXT:    umaxp.2s v0, v0, v1
490; CHECK-NEXT:    ret
491	%tmp1 = load <2 x i32>, ptr %A
492	%tmp2 = load <2 x i32>, ptr %B
493	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
494	ret <2 x i32> %tmp3
495}
496
497define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind {
498; CHECK-LABEL: umaxp_4s:
499; CHECK:       // %bb.0:
500; CHECK-NEXT:    ldr q0, [x0]
501; CHECK-NEXT:    ldr q1, [x1]
502; CHECK-NEXT:    umaxp.4s v0, v0, v1
503; CHECK-NEXT:    ret
504	%tmp1 = load <4 x i32>, ptr %A
505	%tmp2 = load <4 x i32>, ptr %B
506	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
507	ret <4 x i32> %tmp3
508}
509
510declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
511declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
512declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
513declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
514declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
515declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
516
517; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
518
519define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind {
520; CHECK-LABEL: sminp_8b:
521; CHECK:       // %bb.0:
522; CHECK-NEXT:    ldr d0, [x0]
523; CHECK-NEXT:    ldr d1, [x1]
524; CHECK-NEXT:    sminp.8b v0, v0, v1
525; CHECK-NEXT:    ret
526	%tmp1 = load <8 x i8>, ptr %A
527	%tmp2 = load <8 x i8>, ptr %B
528	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
529	ret <8 x i8> %tmp3
530}
531
532define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind {
533; CHECK-LABEL: sminp_16b:
534; CHECK:       // %bb.0:
535; CHECK-NEXT:    ldr q0, [x0]
536; CHECK-NEXT:    ldr q1, [x1]
537; CHECK-NEXT:    sminp.16b v0, v0, v1
538; CHECK-NEXT:    ret
539	%tmp1 = load <16 x i8>, ptr %A
540	%tmp2 = load <16 x i8>, ptr %B
541	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
542	ret <16 x i8> %tmp3
543}
544
545define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind {
546; CHECK-LABEL: sminp_4h:
547; CHECK:       // %bb.0:
548; CHECK-NEXT:    ldr d0, [x0]
549; CHECK-NEXT:    ldr d1, [x1]
550; CHECK-NEXT:    sminp.4h v0, v0, v1
551; CHECK-NEXT:    ret
552	%tmp1 = load <4 x i16>, ptr %A
553	%tmp2 = load <4 x i16>, ptr %B
554	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
555	ret <4 x i16> %tmp3
556}
557
558define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind {
559; CHECK-LABEL: sminp_8h:
560; CHECK:       // %bb.0:
561; CHECK-NEXT:    ldr q0, [x0]
562; CHECK-NEXT:    ldr q1, [x1]
563; CHECK-NEXT:    sminp.8h v0, v0, v1
564; CHECK-NEXT:    ret
565	%tmp1 = load <8 x i16>, ptr %A
566	%tmp2 = load <8 x i16>, ptr %B
567	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
568	ret <8 x i16> %tmp3
569}
570
571define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind {
572; CHECK-LABEL: sminp_2s:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    ldr d0, [x0]
575; CHECK-NEXT:    ldr d1, [x1]
576; CHECK-NEXT:    sminp.2s v0, v0, v1
577; CHECK-NEXT:    ret
578	%tmp1 = load <2 x i32>, ptr %A
579	%tmp2 = load <2 x i32>, ptr %B
580	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
581	ret <2 x i32> %tmp3
582}
583
584define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind {
585; CHECK-LABEL: sminp_4s:
586; CHECK:       // %bb.0:
587; CHECK-NEXT:    ldr q0, [x0]
588; CHECK-NEXT:    ldr q1, [x1]
589; CHECK-NEXT:    sminp.4s v0, v0, v1
590; CHECK-NEXT:    ret
591	%tmp1 = load <4 x i32>, ptr %A
592	%tmp2 = load <4 x i32>, ptr %B
593	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
594	ret <4 x i32> %tmp3
595}
596
597declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
598declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
599declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
600declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
601declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
602declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
603
604define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind {
605; CHECK-LABEL: uminp_8b:
606; CHECK:       // %bb.0:
607; CHECK-NEXT:    ldr d0, [x0]
608; CHECK-NEXT:    ldr d1, [x1]
609; CHECK-NEXT:    uminp.8b v0, v0, v1
610; CHECK-NEXT:    ret
611	%tmp1 = load <8 x i8>, ptr %A
612	%tmp2 = load <8 x i8>, ptr %B
613	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
614	ret <8 x i8> %tmp3
615}
616
617define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind {
618; CHECK-LABEL: uminp_16b:
619; CHECK:       // %bb.0:
620; CHECK-NEXT:    ldr q0, [x0]
621; CHECK-NEXT:    ldr q1, [x1]
622; CHECK-NEXT:    uminp.16b v0, v0, v1
623; CHECK-NEXT:    ret
624	%tmp1 = load <16 x i8>, ptr %A
625	%tmp2 = load <16 x i8>, ptr %B
626	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
627	ret <16 x i8> %tmp3
628}
629
630define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind {
631; CHECK-LABEL: uminp_4h:
632; CHECK:       // %bb.0:
633; CHECK-NEXT:    ldr d0, [x0]
634; CHECK-NEXT:    ldr d1, [x1]
635; CHECK-NEXT:    uminp.4h v0, v0, v1
636; CHECK-NEXT:    ret
637	%tmp1 = load <4 x i16>, ptr %A
638	%tmp2 = load <4 x i16>, ptr %B
639	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
640	ret <4 x i16> %tmp3
641}
642
643define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind {
644; CHECK-LABEL: uminp_8h:
645; CHECK:       // %bb.0:
646; CHECK-NEXT:    ldr q0, [x0]
647; CHECK-NEXT:    ldr q1, [x1]
648; CHECK-NEXT:    uminp.8h v0, v0, v1
649; CHECK-NEXT:    ret
650	%tmp1 = load <8 x i16>, ptr %A
651	%tmp2 = load <8 x i16>, ptr %B
652	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
653	ret <8 x i16> %tmp3
654}
655
656define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind {
657; CHECK-LABEL: uminp_2s:
658; CHECK:       // %bb.0:
659; CHECK-NEXT:    ldr d0, [x0]
660; CHECK-NEXT:    ldr d1, [x1]
661; CHECK-NEXT:    uminp.2s v0, v0, v1
662; CHECK-NEXT:    ret
663	%tmp1 = load <2 x i32>, ptr %A
664	%tmp2 = load <2 x i32>, ptr %B
665	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
666	ret <2 x i32> %tmp3
667}
668
669define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind {
670; CHECK-LABEL: uminp_4s:
671; CHECK:       // %bb.0:
672; CHECK-NEXT:    ldr q0, [x0]
673; CHECK-NEXT:    ldr q1, [x1]
674; CHECK-NEXT:    uminp.4s v0, v0, v1
675; CHECK-NEXT:    ret
676	%tmp1 = load <4 x i32>, ptr %A
677	%tmp2 = load <4 x i32>, ptr %B
678	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
679	ret <4 x i32> %tmp3
680}
681
682declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
683declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
684declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
685declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
686declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
687declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
688
689define <2 x float> @fmax_2s(ptr %A, ptr %B) nounwind {
690; CHECK-LABEL: fmax_2s:
691; CHECK:       // %bb.0:
692; CHECK-NEXT:    ldr d0, [x0]
693; CHECK-NEXT:    ldr d1, [x1]
694; CHECK-NEXT:    fmax.2s v0, v0, v1
695; CHECK-NEXT:    ret
696	%tmp1 = load <2 x float>, ptr %A
697	%tmp2 = load <2 x float>, ptr %B
698	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
699	ret <2 x float> %tmp3
700}
701
702define <4 x float> @fmax_4s(ptr %A, ptr %B) nounwind {
703; CHECK-LABEL: fmax_4s:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    ldr q0, [x0]
706; CHECK-NEXT:    ldr q1, [x1]
707; CHECK-NEXT:    fmax.4s v0, v0, v1
708; CHECK-NEXT:    ret
709	%tmp1 = load <4 x float>, ptr %A
710	%tmp2 = load <4 x float>, ptr %B
711	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
712	ret <4 x float> %tmp3
713}
714
715define <2 x double> @fmax_2d(ptr %A, ptr %B) nounwind {
716; CHECK-LABEL: fmax_2d:
717; CHECK:       // %bb.0:
718; CHECK-NEXT:    ldr q0, [x0]
719; CHECK-NEXT:    ldr q1, [x1]
720; CHECK-NEXT:    fmax.2d v0, v0, v1
721; CHECK-NEXT:    ret
722	%tmp1 = load <2 x double>, ptr %A
723	%tmp2 = load <2 x double>, ptr %B
724	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
725	ret <2 x double> %tmp3
726}
727
728declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
729declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
730declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
731
732define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind {
733; CHECK-LABEL: fmaxp_2s:
734; CHECK:       // %bb.0:
735; CHECK-NEXT:    ldr d0, [x0]
736; CHECK-NEXT:    ldr d1, [x1]
737; CHECK-NEXT:    fmaxp.2s v0, v0, v1
738; CHECK-NEXT:    ret
739	%tmp1 = load <2 x float>, ptr %A
740	%tmp2 = load <2 x float>, ptr %B
741	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
742	ret <2 x float> %tmp3
743}
744
745define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind {
746; CHECK-LABEL: fmaxp_4s:
747; CHECK:       // %bb.0:
748; CHECK-NEXT:    ldr q0, [x0]
749; CHECK-NEXT:    ldr q1, [x1]
750; CHECK-NEXT:    fmaxp.4s v0, v0, v1
751; CHECK-NEXT:    ret
752	%tmp1 = load <4 x float>, ptr %A
753	%tmp2 = load <4 x float>, ptr %B
754	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
755	ret <4 x float> %tmp3
756}
757
758define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind {
759; CHECK-LABEL: fmaxp_2d:
760; CHECK:       // %bb.0:
761; CHECK-NEXT:    ldr q0, [x0]
762; CHECK-NEXT:    ldr q1, [x1]
763; CHECK-NEXT:    fmaxp.2d v0, v0, v1
764; CHECK-NEXT:    ret
765	%tmp1 = load <2 x double>, ptr %A
766	%tmp2 = load <2 x double>, ptr %B
767	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
768	ret <2 x double> %tmp3
769}
770
771declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
772declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
773declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
774
775define <2 x float> @fmin_2s(ptr %A, ptr %B) nounwind {
776; CHECK-LABEL: fmin_2s:
777; CHECK:       // %bb.0:
778; CHECK-NEXT:    ldr d0, [x0]
779; CHECK-NEXT:    ldr d1, [x1]
780; CHECK-NEXT:    fmin.2s v0, v0, v1
781; CHECK-NEXT:    ret
782	%tmp1 = load <2 x float>, ptr %A
783	%tmp2 = load <2 x float>, ptr %B
784	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
785	ret <2 x float> %tmp3
786}
787
788define <4 x float> @fmin_4s(ptr %A, ptr %B) nounwind {
789; CHECK-LABEL: fmin_4s:
790; CHECK:       // %bb.0:
791; CHECK-NEXT:    ldr q0, [x0]
792; CHECK-NEXT:    ldr q1, [x1]
793; CHECK-NEXT:    fmin.4s v0, v0, v1
794; CHECK-NEXT:    ret
795	%tmp1 = load <4 x float>, ptr %A
796	%tmp2 = load <4 x float>, ptr %B
797	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
798	ret <4 x float> %tmp3
799}
800
801define <2 x double> @fmin_2d(ptr %A, ptr %B) nounwind {
802; CHECK-LABEL: fmin_2d:
803; CHECK:       // %bb.0:
804; CHECK-NEXT:    ldr q0, [x0]
805; CHECK-NEXT:    ldr q1, [x1]
806; CHECK-NEXT:    fmin.2d v0, v0, v1
807; CHECK-NEXT:    ret
808	%tmp1 = load <2 x double>, ptr %A
809	%tmp2 = load <2 x double>, ptr %B
810	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
811	ret <2 x double> %tmp3
812}
813
814declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
815declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
816declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
817
818define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind {
819; CHECK-LABEL: fminp_2s:
820; CHECK:       // %bb.0:
821; CHECK-NEXT:    ldr d0, [x0]
822; CHECK-NEXT:    ldr d1, [x1]
823; CHECK-NEXT:    fminp.2s v0, v0, v1
824; CHECK-NEXT:    ret
825	%tmp1 = load <2 x float>, ptr %A
826	%tmp2 = load <2 x float>, ptr %B
827	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
828	ret <2 x float> %tmp3
829}
830
831define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind {
832; CHECK-LABEL: fminp_4s:
833; CHECK:       // %bb.0:
834; CHECK-NEXT:    ldr q0, [x0]
835; CHECK-NEXT:    ldr q1, [x1]
836; CHECK-NEXT:    fminp.4s v0, v0, v1
837; CHECK-NEXT:    ret
838	%tmp1 = load <4 x float>, ptr %A
839	%tmp2 = load <4 x float>, ptr %B
840	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
841	ret <4 x float> %tmp3
842}
843
844define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind {
845; CHECK-LABEL: fminp_2d:
846; CHECK:       // %bb.0:
847; CHECK-NEXT:    ldr q0, [x0]
848; CHECK-NEXT:    ldr q1, [x1]
849; CHECK-NEXT:    fminp.2d v0, v0, v1
850; CHECK-NEXT:    ret
851	%tmp1 = load <2 x double>, ptr %A
852	%tmp2 = load <2 x double>, ptr %B
853	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
854	ret <2 x double> %tmp3
855}
856
857declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
858declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
859declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
860
861define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind {
862; CHECK-LABEL: fminnmp_2s:
863; CHECK:       // %bb.0:
864; CHECK-NEXT:    ldr d0, [x0]
865; CHECK-NEXT:    ldr d1, [x1]
866; CHECK-NEXT:    fminnmp.2s v0, v0, v1
867; CHECK-NEXT:    ret
868	%tmp1 = load <2 x float>, ptr %A
869	%tmp2 = load <2 x float>, ptr %B
870	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
871	ret <2 x float> %tmp3
872}
873
874define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind {
875; CHECK-LABEL: fminnmp_4s:
876; CHECK:       // %bb.0:
877; CHECK-NEXT:    ldr q0, [x0]
878; CHECK-NEXT:    ldr q1, [x1]
879; CHECK-NEXT:    fminnmp.4s v0, v0, v1
880; CHECK-NEXT:    ret
881	%tmp1 = load <4 x float>, ptr %A
882	%tmp2 = load <4 x float>, ptr %B
883	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
884	ret <4 x float> %tmp3
885}
886
887define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind {
888; CHECK-LABEL: fminnmp_2d:
889; CHECK:       // %bb.0:
890; CHECK-NEXT:    ldr q0, [x0]
891; CHECK-NEXT:    ldr q1, [x1]
892; CHECK-NEXT:    fminnmp.2d v0, v0, v1
893; CHECK-NEXT:    ret
894	%tmp1 = load <2 x double>, ptr %A
895	%tmp2 = load <2 x double>, ptr %B
896	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
897	ret <2 x double> %tmp3
898}
899
900declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
901declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
902declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
903
904define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind {
905; CHECK-LABEL: fmaxnmp_2s:
906; CHECK:       // %bb.0:
907; CHECK-NEXT:    ldr d0, [x0]
908; CHECK-NEXT:    ldr d1, [x1]
909; CHECK-NEXT:    fmaxnmp.2s v0, v0, v1
910; CHECK-NEXT:    ret
911	%tmp1 = load <2 x float>, ptr %A
912	%tmp2 = load <2 x float>, ptr %B
913	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
914	ret <2 x float> %tmp3
915}
916
917define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind {
918; CHECK-LABEL: fmaxnmp_4s:
919; CHECK:       // %bb.0:
920; CHECK-NEXT:    ldr q0, [x0]
921; CHECK-NEXT:    ldr q1, [x1]
922; CHECK-NEXT:    fmaxnmp.4s v0, v0, v1
923; CHECK-NEXT:    ret
924	%tmp1 = load <4 x float>, ptr %A
925	%tmp2 = load <4 x float>, ptr %B
926	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
927	ret <4 x float> %tmp3
928}
929
930define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind {
931; CHECK-LABEL: fmaxnmp_2d:
932; CHECK:       // %bb.0:
933; CHECK-NEXT:    ldr q0, [x0]
934; CHECK-NEXT:    ldr q1, [x1]
935; CHECK-NEXT:    fmaxnmp.2d v0, v0, v1
936; CHECK-NEXT:    ret
937	%tmp1 = load <2 x double>, ptr %A
938	%tmp2 = load <2 x double>, ptr %B
939	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
940	ret <2 x double> %tmp3
941}
942
943declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
944declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
945declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
946