xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
3
4declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
5
6define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) {
7; CHECK-LABEL: smaxi8:
8; CHECK:       @ %bb.0:
9; CHECK-NEXT:    sxtb r1, r1
10; CHECK-NEXT:    sxtb r0, r0
11; CHECK-NEXT:    cmp r0, r1
12; CHECK-NEXT:    csel r0, r0, r1, gt
13; CHECK-NEXT:    bx lr
14  %c = call i8 @llvm.smax.i8(i8 %a, i8 %b)
15  ret i8 %c
16}
17
18declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
19
20define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) {
21; CHECK-LABEL: smaxi16:
22; CHECK:       @ %bb.0:
23; CHECK-NEXT:    sxth r1, r1
24; CHECK-NEXT:    sxth r0, r0
25; CHECK-NEXT:    cmp r0, r1
26; CHECK-NEXT:    csel r0, r0, r1, gt
27; CHECK-NEXT:    bx lr
28  %c = call i16 @llvm.smax.i16(i16 %a, i16 %b)
29  ret i16 %c
30}
31
32declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone
33
34define arm_aapcs_vfpcc i32 @smaxi32(i32 %a, i32 %b) {
35; CHECK-LABEL: smaxi32:
36; CHECK:       @ %bb.0:
37; CHECK-NEXT:    cmp r0, r1
38; CHECK-NEXT:    csel r0, r0, r1, gt
39; CHECK-NEXT:    bx lr
40  %c = call i32 @llvm.smax.i32(i32 %a, i32 %b)
41  ret i32 %c
42}
43
44declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone
45
46define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) {
47; CHECK-LABEL: smaxi64:
48; CHECK:       @ %bb.0:
49; CHECK-NEXT:    subs.w r12, r2, r0
50; CHECK-NEXT:    sbcs.w r12, r3, r1
51; CHECK-NEXT:    csel r0, r0, r2, lt
52; CHECK-NEXT:    csel r1, r1, r3, lt
53; CHECK-NEXT:    bx lr
54  %c = call i64 @llvm.smax.i64(i64 %a, i64 %b)
55  ret i64 %c
56}
57
58declare <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
59
60define arm_aapcs_vfpcc <8 x i8> @smax8i8(<8 x i8> %a, <8 x i8> %b) {
61; CHECK-LABEL: smax8i8:
62; CHECK:       @ %bb.0:
63; CHECK-NEXT:    vmovlb.s8 q1, q1
64; CHECK-NEXT:    vmovlb.s8 q0, q0
65; CHECK-NEXT:    vmax.s16 q0, q0, q1
66; CHECK-NEXT:    bx lr
67  %c = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
68  ret <8 x i8> %c
69}
70
71declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
72
73define arm_aapcs_vfpcc <16 x i8> @smax16i8(<16 x i8> %a, <16 x i8> %b) {
74; CHECK-LABEL: smax16i8:
75; CHECK:       @ %bb.0:
76; CHECK-NEXT:    vmax.s8 q0, q0, q1
77; CHECK-NEXT:    bx lr
78  %c = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
79  ret <16 x i8> %c
80}
81
82declare <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
83
84define arm_aapcs_vfpcc void @smax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
85; CHECK-LABEL: smax32i8:
86; CHECK:       @ %bb.0:
87; CHECK-NEXT:    vmax.s8 q1, q1, q3
88; CHECK-NEXT:    vmax.s8 q0, q0, q2
89; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
90; CHECK-NEXT:    vstrw.32 q0, [r0]
91; CHECK-NEXT:    bx lr
92  %c = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b)
93  store <32 x i8> %c, ptr %p
94  ret void
95}
96
97declare <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
98
99define arm_aapcs_vfpcc <4 x i16> @smax4i16(<4 x i16> %a, <4 x i16> %b) {
100; CHECK-LABEL: smax4i16:
101; CHECK:       @ %bb.0:
102; CHECK-NEXT:    vmovlb.s16 q1, q1
103; CHECK-NEXT:    vmovlb.s16 q0, q0
104; CHECK-NEXT:    vmax.s32 q0, q0, q1
105; CHECK-NEXT:    bx lr
106  %c = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
107  ret <4 x i16> %c
108}
109
110declare <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
111
112define arm_aapcs_vfpcc <8 x i16> @smax8i16(<8 x i16> %a, <8 x i16> %b) {
113; CHECK-LABEL: smax8i16:
114; CHECK:       @ %bb.0:
115; CHECK-NEXT:    vmax.s16 q0, q0, q1
116; CHECK-NEXT:    bx lr
117  %c = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
118  ret <8 x i16> %c
119}
120
121declare <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
122
123define arm_aapcs_vfpcc void @smax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
124; CHECK-LABEL: smax16i16:
125; CHECK:       @ %bb.0:
126; CHECK-NEXT:    vmax.s16 q1, q1, q3
127; CHECK-NEXT:    vmax.s16 q0, q0, q2
128; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
129; CHECK-NEXT:    vstrw.32 q0, [r0]
130; CHECK-NEXT:    bx lr
131  %c = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b)
132  store <16 x i16> %c, ptr %p
133  ret void
134}
135
136declare <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
137
138define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) {
139; CHECK-LABEL: smax2i32:
140; CHECK:       @ %bb.0:
141; CHECK-NEXT:    .save {r4, r5, r7, lr}
142; CHECK-NEXT:    push {r4, r5, r7, lr}
143; CHECK-NEXT:    vmov r1, s4
144; CHECK-NEXT:    vmov r3, s0
145; CHECK-NEXT:    vmov r0, s6
146; CHECK-NEXT:    vmov r2, s2
147; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
148; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
149; CHECK-NEXT:    asr.w lr, r1, #31
150; CHECK-NEXT:    subs r1, r1, r3
151; CHECK-NEXT:    sbcs.w r1, lr, r3, asr #31
152; CHECK-NEXT:    asr.w r5, r3, #31
153; CHECK-NEXT:    asr.w r12, r0, #31
154; CHECK-NEXT:    csetm r1, lt
155; CHECK-NEXT:    subs r0, r0, r2
156; CHECK-NEXT:    mov.w r3, #0
157; CHECK-NEXT:    sbcs.w r0, r12, r2, asr #31
158; CHECK-NEXT:    bfi r3, r1, #0, #8
159; CHECK-NEXT:    csetm r0, lt
160; CHECK-NEXT:    asrs r4, r2, #31
161; CHECK-NEXT:    bfi r3, r0, #8, #8
162; CHECK-NEXT:    vmov q1[3], q1[1], lr, r12
163; CHECK-NEXT:    vmov q0[3], q0[1], r5, r4
164; CHECK-NEXT:    vmsr p0, r3
165; CHECK-NEXT:    vpsel q0, q0, q1
166; CHECK-NEXT:    pop {r4, r5, r7, pc}
167  %c = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
168  ret <2 x i32> %c
169}
170
171declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
172
173define arm_aapcs_vfpcc <4 x i32> @smax4i32(<4 x i32> %a, <4 x i32> %b) {
174; CHECK-LABEL: smax4i32:
175; CHECK:       @ %bb.0:
176; CHECK-NEXT:    vmax.s32 q0, q0, q1
177; CHECK-NEXT:    bx lr
178  %c = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
179  ret <4 x i32> %c
180}
181
182declare <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
183
184define arm_aapcs_vfpcc void @smax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
185; CHECK-LABEL: smax8i32:
186; CHECK:       @ %bb.0:
187; CHECK-NEXT:    vmax.s32 q1, q1, q3
188; CHECK-NEXT:    vmax.s32 q0, q0, q2
189; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
190; CHECK-NEXT:    vstrw.32 q0, [r0]
191; CHECK-NEXT:    bx lr
192  %c = call <8 x i32>@llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b)
193  store <8 x i32> %c, ptr %p
194  ret void
195}
196
197declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
198
199define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
200; CHECK-LABEL: smax1i64:
201; CHECK:       @ %bb.0:
202; CHECK-NEXT:    .pad #8
203; CHECK-NEXT:    sub sp, #8
204; CHECK-NEXT:    subs.w r12, r2, r0
205; CHECK-NEXT:    sbcs.w r12, r3, r1
206; CHECK-NEXT:    csel r0, r0, r2, lt
207; CHECK-NEXT:    csel r1, r1, r3, lt
208; CHECK-NEXT:    add sp, #8
209; CHECK-NEXT:    bx lr
210  %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
211  ret <1 x i64> %c
212}
213
214declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
215
216define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
217; CHECK-LABEL: smax2i64:
218; CHECK:       @ %bb.0:
219; CHECK-NEXT:    vmov r0, r1, d0
220; CHECK-NEXT:    vmov r2, r3, d2
221; CHECK-NEXT:    subs r0, r2, r0
222; CHECK-NEXT:    sbcs.w r0, r3, r1
223; CHECK-NEXT:    mov.w r1, #0
224; CHECK-NEXT:    csetm r0, lt
225; CHECK-NEXT:    vmov r3, r2, d3
226; CHECK-NEXT:    bfi r1, r0, #0, #8
227; CHECK-NEXT:    vmov r0, r12, d1
228; CHECK-NEXT:    subs r0, r3, r0
229; CHECK-NEXT:    sbcs.w r0, r2, r12
230; CHECK-NEXT:    csetm r0, lt
231; CHECK-NEXT:    bfi r1, r0, #8, #8
232; CHECK-NEXT:    vmsr p0, r1
233; CHECK-NEXT:    vpsel q0, q0, q1
234; CHECK-NEXT:    bx lr
235  %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
236  ret <2 x i64> %c
237}
238
239declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
240
241define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
242; CHECK-LABEL: smax4i64:
243; CHECK:       @ %bb.0:
244; CHECK-NEXT:    .save {r4, lr}
245; CHECK-NEXT:    push {r4, lr}
246; CHECK-NEXT:    vmov r1, r12, d2
247; CHECK-NEXT:    vmov r3, r2, d6
248; CHECK-NEXT:    subs r1, r3, r1
249; CHECK-NEXT:    mov.w r3, #0
250; CHECK-NEXT:    sbcs.w r1, r2, r12
251; CHECK-NEXT:    vmov lr, r12, d3
252; CHECK-NEXT:    csetm r2, lt
253; CHECK-NEXT:    movs r1, #0
254; CHECK-NEXT:    bfi r3, r2, #0, #8
255; CHECK-NEXT:    vmov r2, r4, d7
256; CHECK-NEXT:    subs.w r2, r2, lr
257; CHECK-NEXT:    sbcs.w r2, r4, r12
258; CHECK-NEXT:    csetm r2, lt
259; CHECK-NEXT:    bfi r3, r2, #8, #8
260; CHECK-NEXT:    vmov r2, r12, d0
261; CHECK-NEXT:    vmsr p0, r3
262; CHECK-NEXT:    vmov r4, r3, d4
263; CHECK-NEXT:    vpsel q1, q1, q3
264; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
265; CHECK-NEXT:    subs r2, r4, r2
266; CHECK-NEXT:    sbcs.w r2, r3, r12
267; CHECK-NEXT:    vmov r4, r3, d5
268; CHECK-NEXT:    csetm r2, lt
269; CHECK-NEXT:    bfi r1, r2, #0, #8
270; CHECK-NEXT:    vmov r2, r12, d1
271; CHECK-NEXT:    subs r2, r4, r2
272; CHECK-NEXT:    sbcs.w r2, r3, r12
273; CHECK-NEXT:    csetm r2, lt
274; CHECK-NEXT:    bfi r1, r2, #8, #8
275; CHECK-NEXT:    vmsr p0, r1
276; CHECK-NEXT:    vpsel q0, q0, q2
277; CHECK-NEXT:    vstrw.32 q0, [r0]
278; CHECK-NEXT:    pop {r4, pc}
279  %c = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b)
280  store <4 x i64> %c, ptr %p
281  ret void
282}
283
284declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone
285
286define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) {
287; CHECK-LABEL: umaxi8:
288; CHECK:       @ %bb.0:
289; CHECK-NEXT:    uxtb r1, r1
290; CHECK-NEXT:    uxtb r0, r0
291; CHECK-NEXT:    cmp r0, r1
292; CHECK-NEXT:    csel r0, r0, r1, hi
293; CHECK-NEXT:    bx lr
294  %c = call i8 @llvm.umax.i8(i8 %a, i8 %b)
295  ret i8 %c
296}
297
298declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone
299
300define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) {
301; CHECK-LABEL: umaxi16:
302; CHECK:       @ %bb.0:
303; CHECK-NEXT:    uxth r1, r1
304; CHECK-NEXT:    uxth r0, r0
305; CHECK-NEXT:    cmp r0, r1
306; CHECK-NEXT:    csel r0, r0, r1, hi
307; CHECK-NEXT:    bx lr
308  %c = call i16 @llvm.umax.i16(i16 %a, i16 %b)
309  ret i16 %c
310}
311
312declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone
313
314define arm_aapcs_vfpcc i32 @umaxi32(i32 %a, i32 %b) {
315; CHECK-LABEL: umaxi32:
316; CHECK:       @ %bb.0:
317; CHECK-NEXT:    cmp r0, r1
318; CHECK-NEXT:    csel r0, r0, r1, hi
319; CHECK-NEXT:    bx lr
320  %c = call i32 @llvm.umax.i32(i32 %a, i32 %b)
321  ret i32 %c
322}
323
324declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone
325
326define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) {
327; CHECK-LABEL: umaxi64:
328; CHECK:       @ %bb.0:
329; CHECK-NEXT:    subs.w r12, r2, r0
330; CHECK-NEXT:    sbcs.w r12, r3, r1
331; CHECK-NEXT:    csel r0, r0, r2, lo
332; CHECK-NEXT:    csel r1, r1, r3, lo
333; CHECK-NEXT:    bx lr
334  %c = call i64 @llvm.umax.i64(i64 %a, i64 %b)
335  ret i64 %c
336}
337
338declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
339
340define arm_aapcs_vfpcc <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) {
341; CHECK-LABEL: umax8i8:
342; CHECK:       @ %bb.0:
343; CHECK-NEXT:    vmovlb.u8 q1, q1
344; CHECK-NEXT:    vmovlb.u8 q0, q0
345; CHECK-NEXT:    vmax.u16 q0, q0, q1
346; CHECK-NEXT:    bx lr
347  %c = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
348  ret <8 x i8> %c
349}
350
351declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
352
353define arm_aapcs_vfpcc <16 x i8> @umax16i8(<16 x i8> %a, <16 x i8> %b) {
354; CHECK-LABEL: umax16i8:
355; CHECK:       @ %bb.0:
356; CHECK-NEXT:    vmax.u8 q0, q0, q1
357; CHECK-NEXT:    bx lr
358  %c = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
359  ret <16 x i8> %c
360}
361
362declare <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
363
364define arm_aapcs_vfpcc void @umax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
365; CHECK-LABEL: umax32i8:
366; CHECK:       @ %bb.0:
367; CHECK-NEXT:    vmax.u8 q1, q1, q3
368; CHECK-NEXT:    vmax.u8 q0, q0, q2
369; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
370; CHECK-NEXT:    vstrw.32 q0, [r0]
371; CHECK-NEXT:    bx lr
372  %c = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b)
373  store <32 x i8> %c, ptr %p
374  ret void
375}
376
377declare <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
378
379define arm_aapcs_vfpcc <4 x i16> @umax4i16(<4 x i16> %a, <4 x i16> %b) {
380; CHECK-LABEL: umax4i16:
381; CHECK:       @ %bb.0:
382; CHECK-NEXT:    vmovlb.u16 q1, q1
383; CHECK-NEXT:    vmovlb.u16 q0, q0
384; CHECK-NEXT:    vmax.u32 q0, q0, q1
385; CHECK-NEXT:    bx lr
386  %c = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
387  ret <4 x i16> %c
388}
389
390declare <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
391
392define arm_aapcs_vfpcc <8 x i16> @umax8i16(<8 x i16> %a, <8 x i16> %b) {
393; CHECK-LABEL: umax8i16:
394; CHECK:       @ %bb.0:
395; CHECK-NEXT:    vmax.u16 q0, q0, q1
396; CHECK-NEXT:    bx lr
397  %c = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
398  ret <8 x i16> %c
399}
400
401declare <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
402
403define arm_aapcs_vfpcc void @umax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
404; CHECK-LABEL: umax16i16:
405; CHECK:       @ %bb.0:
406; CHECK-NEXT:    vmax.u16 q1, q1, q3
407; CHECK-NEXT:    vmax.u16 q0, q0, q2
408; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
409; CHECK-NEXT:    vstrw.32 q0, [r0]
410; CHECK-NEXT:    bx lr
411  %c = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b)
412  store <16 x i16> %c, ptr %p
413  ret void
414}
415
416declare <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
417
418define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) {
419; CHECK-LABEL: umax2i32:
420; CHECK:       @ %bb.0:
421; CHECK-NEXT:    vmov.i64 q2, #0xffffffff
422; CHECK-NEXT:    vand q0, q0, q2
423; CHECK-NEXT:    vand q1, q1, q2
424; CHECK-NEXT:    vmov r0, r1, d0
425; CHECK-NEXT:    vmov r2, r3, d2
426; CHECK-NEXT:    subs r0, r2, r0
427; CHECK-NEXT:    sbcs.w r0, r3, r1
428; CHECK-NEXT:    mov.w r1, #0
429; CHECK-NEXT:    csetm r0, lo
430; CHECK-NEXT:    vmov r3, r2, d3
431; CHECK-NEXT:    bfi r1, r0, #0, #8
432; CHECK-NEXT:    vmov r0, r12, d1
433; CHECK-NEXT:    subs r0, r3, r0
434; CHECK-NEXT:    sbcs.w r0, r2, r12
435; CHECK-NEXT:    csetm r0, lo
436; CHECK-NEXT:    bfi r1, r0, #8, #8
437; CHECK-NEXT:    vmsr p0, r1
438; CHECK-NEXT:    vpsel q0, q0, q1
439; CHECK-NEXT:    bx lr
440  %c = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
441  ret <2 x i32> %c
442}
443
444declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
445
446define arm_aapcs_vfpcc <4 x i32> @umax4i32(<4 x i32> %a, <4 x i32> %b) {
447; CHECK-LABEL: umax4i32:
448; CHECK:       @ %bb.0:
449; CHECK-NEXT:    vmax.u32 q0, q0, q1
450; CHECK-NEXT:    bx lr
451  %c = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
452  ret <4 x i32> %c
453}
454
455declare <8 x i32> @llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
456
457define arm_aapcs_vfpcc void @umax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
458; CHECK-LABEL: umax8i32:
459; CHECK:       @ %bb.0:
460; CHECK-NEXT:    vmax.u32 q1, q1, q3
461; CHECK-NEXT:    vmax.u32 q0, q0, q2
462; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
463; CHECK-NEXT:    vstrw.32 q0, [r0]
464; CHECK-NEXT:    bx lr
465  %c = call <8 x i32>@llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b)
466  store <8 x i32> %c, ptr %p
467  ret void
468}
469
470declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
471
472define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
473; CHECK-LABEL: umax1i64:
474; CHECK:       @ %bb.0:
475; CHECK-NEXT:    .pad #8
476; CHECK-NEXT:    sub sp, #8
477; CHECK-NEXT:    subs.w r12, r2, r0
478; CHECK-NEXT:    sbcs.w r12, r3, r1
479; CHECK-NEXT:    csel r0, r0, r2, lo
480; CHECK-NEXT:    csel r1, r1, r3, lo
481; CHECK-NEXT:    add sp, #8
482; CHECK-NEXT:    bx lr
483  %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
484  ret <1 x i64> %c
485}
486
487declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
488
489define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
490; CHECK-LABEL: umax2i64:
491; CHECK:       @ %bb.0:
492; CHECK-NEXT:    vmov r0, r1, d0
493; CHECK-NEXT:    vmov r2, r3, d2
494; CHECK-NEXT:    subs r0, r2, r0
495; CHECK-NEXT:    sbcs.w r0, r3, r1
496; CHECK-NEXT:    mov.w r1, #0
497; CHECK-NEXT:    csetm r0, lo
498; CHECK-NEXT:    vmov r3, r2, d3
499; CHECK-NEXT:    bfi r1, r0, #0, #8
500; CHECK-NEXT:    vmov r0, r12, d1
501; CHECK-NEXT:    subs r0, r3, r0
502; CHECK-NEXT:    sbcs.w r0, r2, r12
503; CHECK-NEXT:    csetm r0, lo
504; CHECK-NEXT:    bfi r1, r0, #8, #8
505; CHECK-NEXT:    vmsr p0, r1
506; CHECK-NEXT:    vpsel q0, q0, q1
507; CHECK-NEXT:    bx lr
508  %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
509  ret <2 x i64> %c
510}
511
512declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
513
514define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
515; CHECK-LABEL: umax4i64:
516; CHECK:       @ %bb.0:
517; CHECK-NEXT:    .save {r4, lr}
518; CHECK-NEXT:    push {r4, lr}
519; CHECK-NEXT:    vmov r1, r12, d2
520; CHECK-NEXT:    vmov r3, r2, d6
521; CHECK-NEXT:    subs r1, r3, r1
522; CHECK-NEXT:    mov.w r3, #0
523; CHECK-NEXT:    sbcs.w r1, r2, r12
524; CHECK-NEXT:    vmov lr, r12, d3
525; CHECK-NEXT:    csetm r2, lo
526; CHECK-NEXT:    movs r1, #0
527; CHECK-NEXT:    bfi r3, r2, #0, #8
528; CHECK-NEXT:    vmov r2, r4, d7
529; CHECK-NEXT:    subs.w r2, r2, lr
530; CHECK-NEXT:    sbcs.w r2, r4, r12
531; CHECK-NEXT:    csetm r2, lo
532; CHECK-NEXT:    bfi r3, r2, #8, #8
533; CHECK-NEXT:    vmov r2, r12, d0
534; CHECK-NEXT:    vmsr p0, r3
535; CHECK-NEXT:    vmov r4, r3, d4
536; CHECK-NEXT:    vpsel q1, q1, q3
537; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
538; CHECK-NEXT:    subs r2, r4, r2
539; CHECK-NEXT:    sbcs.w r2, r3, r12
540; CHECK-NEXT:    vmov r4, r3, d5
541; CHECK-NEXT:    csetm r2, lo
542; CHECK-NEXT:    bfi r1, r2, #0, #8
543; CHECK-NEXT:    vmov r2, r12, d1
544; CHECK-NEXT:    subs r2, r4, r2
545; CHECK-NEXT:    sbcs.w r2, r3, r12
546; CHECK-NEXT:    csetm r2, lo
547; CHECK-NEXT:    bfi r1, r2, #8, #8
548; CHECK-NEXT:    vmsr p0, r1
549; CHECK-NEXT:    vpsel q0, q0, q2
550; CHECK-NEXT:    vstrw.32 q0, [r0]
551; CHECK-NEXT:    pop {r4, pc}
552  %c = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b)
553  store <4 x i64> %c, ptr %p
554  ret void
555}
556
557declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
558
559define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) {
560; CHECK-LABEL: smini8:
561; CHECK:       @ %bb.0:
562; CHECK-NEXT:    sxtb r1, r1
563; CHECK-NEXT:    sxtb r0, r0
564; CHECK-NEXT:    cmp r0, r1
565; CHECK-NEXT:    csel r0, r0, r1, lt
566; CHECK-NEXT:    bx lr
567  %c = call i8 @llvm.smin.i8(i8 %a, i8 %b)
568  ret i8 %c
569}
570
571declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
572
573define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) {
574; CHECK-LABEL: smini16:
575; CHECK:       @ %bb.0:
576; CHECK-NEXT:    sxth r1, r1
577; CHECK-NEXT:    sxth r0, r0
578; CHECK-NEXT:    cmp r0, r1
579; CHECK-NEXT:    csel r0, r0, r1, lt
580; CHECK-NEXT:    bx lr
581  %c = call i16 @llvm.smin.i16(i16 %a, i16 %b)
582  ret i16 %c
583}
584
585declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
586
587define arm_aapcs_vfpcc i32 @smini32(i32 %a, i32 %b) {
588; CHECK-LABEL: smini32:
589; CHECK:       @ %bb.0:
590; CHECK-NEXT:    cmp r0, r1
591; CHECK-NEXT:    csel r0, r0, r1, lt
592; CHECK-NEXT:    bx lr
593  %c = call i32 @llvm.smin.i32(i32 %a, i32 %b)
594  ret i32 %c
595}
596
597declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone
598
599define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) {
600; CHECK-LABEL: smini64:
601; CHECK:       @ %bb.0:
602; CHECK-NEXT:    subs.w r12, r0, r2
603; CHECK-NEXT:    sbcs.w r12, r1, r3
604; CHECK-NEXT:    csel r0, r0, r2, lt
605; CHECK-NEXT:    csel r1, r1, r3, lt
606; CHECK-NEXT:    bx lr
607  %c = call i64 @llvm.smin.i64(i64 %a, i64 %b)
608  ret i64 %c
609}
610
611declare <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
612
613define arm_aapcs_vfpcc <8 x i8> @smin8i8(<8 x i8> %a, <8 x i8> %b) {
614; CHECK-LABEL: smin8i8:
615; CHECK:       @ %bb.0:
616; CHECK-NEXT:    vmovlb.s8 q1, q1
617; CHECK-NEXT:    vmovlb.s8 q0, q0
618; CHECK-NEXT:    vmin.s16 q0, q0, q1
619; CHECK-NEXT:    bx lr
620  %c = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
621  ret <8 x i8> %c
622}
623
624declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
625
626define arm_aapcs_vfpcc <16 x i8> @smin16i8(<16 x i8> %a, <16 x i8> %b) {
627; CHECK-LABEL: smin16i8:
628; CHECK:       @ %bb.0:
629; CHECK-NEXT:    vmin.s8 q0, q0, q1
630; CHECK-NEXT:    bx lr
631  %c = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
632  ret <16 x i8> %c
633}
634
635declare <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
636
637define arm_aapcs_vfpcc void @smin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
638; CHECK-LABEL: smin32i8:
639; CHECK:       @ %bb.0:
640; CHECK-NEXT:    vmin.s8 q1, q1, q3
641; CHECK-NEXT:    vmin.s8 q0, q0, q2
642; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
643; CHECK-NEXT:    vstrw.32 q0, [r0]
644; CHECK-NEXT:    bx lr
645  %c = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b)
646  store <32 x i8> %c, ptr %p
647  ret void
648}
649
650declare <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
651
652define arm_aapcs_vfpcc <4 x i16> @smin4i16(<4 x i16> %a, <4 x i16> %b) {
653; CHECK-LABEL: smin4i16:
654; CHECK:       @ %bb.0:
655; CHECK-NEXT:    vmovlb.s16 q1, q1
656; CHECK-NEXT:    vmovlb.s16 q0, q0
657; CHECK-NEXT:    vmin.s32 q0, q0, q1
658; CHECK-NEXT:    bx lr
659  %c = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
660  ret <4 x i16> %c
661}
662
663declare <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
664
665define arm_aapcs_vfpcc <8 x i16> @smin8i16(<8 x i16> %a, <8 x i16> %b) {
666; CHECK-LABEL: smin8i16:
667; CHECK:       @ %bb.0:
668; CHECK-NEXT:    vmin.s16 q0, q0, q1
669; CHECK-NEXT:    bx lr
670  %c = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
671  ret <8 x i16> %c
672}
673
674declare <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
675
676define arm_aapcs_vfpcc void @smin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
677; CHECK-LABEL: smin16i16:
678; CHECK:       @ %bb.0:
679; CHECK-NEXT:    vmin.s16 q1, q1, q3
680; CHECK-NEXT:    vmin.s16 q0, q0, q2
681; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
682; CHECK-NEXT:    vstrw.32 q0, [r0]
683; CHECK-NEXT:    bx lr
684  %c = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b)
685  store <16 x i16> %c, ptr %p
686  ret void
687}
688
689declare <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
690
691define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) {
692; CHECK-LABEL: smin2i32:
693; CHECK:       @ %bb.0:
694; CHECK-NEXT:    .save {r7, lr}
695; CHECK-NEXT:    push {r7, lr}
696; CHECK-NEXT:    vmov r0, s6
697; CHECK-NEXT:    vmov r1, s4
698; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
699; CHECK-NEXT:    asrs r2, r0, #31
700; CHECK-NEXT:    asrs r3, r1, #31
701; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
702; CHECK-NEXT:    vmov r3, s0
703; CHECK-NEXT:    vmov r2, s2
704; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
705; CHECK-NEXT:    asr.w lr, r3, #31
706; CHECK-NEXT:    subs r3, r3, r1
707; CHECK-NEXT:    sbcs.w r1, lr, r1, asr #31
708; CHECK-NEXT:    mov.w r3, #0
709; CHECK-NEXT:    csetm r1, lt
710; CHECK-NEXT:    asr.w r12, r2, #31
711; CHECK-NEXT:    bfi r3, r1, #0, #8
712; CHECK-NEXT:    subs r1, r2, r0
713; CHECK-NEXT:    sbcs.w r0, r12, r0, asr #31
714; CHECK-NEXT:    vmov q0[3], q0[1], lr, r12
715; CHECK-NEXT:    csetm r0, lt
716; CHECK-NEXT:    bfi r3, r0, #8, #8
717; CHECK-NEXT:    vmsr p0, r3
718; CHECK-NEXT:    vpsel q0, q0, q1
719; CHECK-NEXT:    pop {r7, pc}
720  %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
721  ret <2 x i32> %c
722}
723
724declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
725
726define arm_aapcs_vfpcc <4 x i32> @smin4i32(<4 x i32> %a, <4 x i32> %b) {
727; CHECK-LABEL: smin4i32:
728; CHECK:       @ %bb.0:
729; CHECK-NEXT:    vmin.s32 q0, q0, q1
730; CHECK-NEXT:    bx lr
731  %c = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
732  ret <4 x i32> %c
733}
734
735declare <8 x i32> @llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
736
737define arm_aapcs_vfpcc void @smin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
738; CHECK-LABEL: smin8i32:
739; CHECK:       @ %bb.0:
740; CHECK-NEXT:    vmin.s32 q1, q1, q3
741; CHECK-NEXT:    vmin.s32 q0, q0, q2
742; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
743; CHECK-NEXT:    vstrw.32 q0, [r0]
744; CHECK-NEXT:    bx lr
745  %c = call <8 x i32>@llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b)
746  store <8 x i32> %c, ptr %p
747  ret void
748}
749
750declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
751
752define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
753; CHECK-LABEL: smin1i64:
754; CHECK:       @ %bb.0:
755; CHECK-NEXT:    .pad #8
756; CHECK-NEXT:    sub sp, #8
757; CHECK-NEXT:    subs.w r12, r0, r2
758; CHECK-NEXT:    sbcs.w r12, r1, r3
759; CHECK-NEXT:    csel r0, r0, r2, lt
760; CHECK-NEXT:    csel r1, r1, r3, lt
761; CHECK-NEXT:    add sp, #8
762; CHECK-NEXT:    bx lr
763  %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
764  ret <1 x i64> %c
765}
766
767declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
768
769define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
770; CHECK-LABEL: smin2i64:
771; CHECK:       @ %bb.0:
772; CHECK-NEXT:    vmov r0, r1, d2
773; CHECK-NEXT:    vmov r2, r3, d0
774; CHECK-NEXT:    subs r0, r2, r0
775; CHECK-NEXT:    sbcs.w r0, r3, r1
776; CHECK-NEXT:    mov.w r1, #0
777; CHECK-NEXT:    csetm r0, lt
778; CHECK-NEXT:    vmov r3, r2, d1
779; CHECK-NEXT:    bfi r1, r0, #0, #8
780; CHECK-NEXT:    vmov r0, r12, d3
781; CHECK-NEXT:    subs r0, r3, r0
782; CHECK-NEXT:    sbcs.w r0, r2, r12
783; CHECK-NEXT:    csetm r0, lt
784; CHECK-NEXT:    bfi r1, r0, #8, #8
785; CHECK-NEXT:    vmsr p0, r1
786; CHECK-NEXT:    vpsel q0, q0, q1
787; CHECK-NEXT:    bx lr
788  %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
789  ret <2 x i64> %c
790}
791
792declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
793
794define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
795; CHECK-LABEL: smin4i64:
796; CHECK:       @ %bb.0:
797; CHECK-NEXT:    .save {r4, lr}
798; CHECK-NEXT:    push {r4, lr}
799; CHECK-NEXT:    vmov r1, r12, d6
800; CHECK-NEXT:    vmov r3, r2, d2
801; CHECK-NEXT:    subs r1, r3, r1
802; CHECK-NEXT:    mov.w r3, #0
803; CHECK-NEXT:    sbcs.w r1, r2, r12
804; CHECK-NEXT:    vmov lr, r12, d7
805; CHECK-NEXT:    csetm r2, lt
806; CHECK-NEXT:    movs r1, #0
807; CHECK-NEXT:    bfi r3, r2, #0, #8
808; CHECK-NEXT:    vmov r2, r4, d3
809; CHECK-NEXT:    subs.w r2, r2, lr
810; CHECK-NEXT:    sbcs.w r2, r4, r12
811; CHECK-NEXT:    csetm r2, lt
812; CHECK-NEXT:    bfi r3, r2, #8, #8
813; CHECK-NEXT:    vmov r2, r12, d4
814; CHECK-NEXT:    vmsr p0, r3
815; CHECK-NEXT:    vmov r4, r3, d0
816; CHECK-NEXT:    vpsel q1, q1, q3
817; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
818; CHECK-NEXT:    subs r2, r4, r2
819; CHECK-NEXT:    sbcs.w r2, r3, r12
820; CHECK-NEXT:    vmov r4, r3, d1
821; CHECK-NEXT:    csetm r2, lt
822; CHECK-NEXT:    bfi r1, r2, #0, #8
823; CHECK-NEXT:    vmov r2, r12, d5
824; CHECK-NEXT:    subs r2, r4, r2
825; CHECK-NEXT:    sbcs.w r2, r3, r12
826; CHECK-NEXT:    csetm r2, lt
827; CHECK-NEXT:    bfi r1, r2, #8, #8
828; CHECK-NEXT:    vmsr p0, r1
829; CHECK-NEXT:    vpsel q0, q0, q2
830; CHECK-NEXT:    vstrw.32 q0, [r0]
831; CHECK-NEXT:    pop {r4, pc}
832  %c = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b)
833  store <4 x i64> %c, ptr %p
834  ret void
835}
836
837declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone
838
839define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) {
840; CHECK-LABEL: umini8:
841; CHECK:       @ %bb.0:
842; CHECK-NEXT:    uxtb r1, r1
843; CHECK-NEXT:    uxtb r0, r0
844; CHECK-NEXT:    cmp r0, r1
845; CHECK-NEXT:    csel r0, r0, r1, lo
846; CHECK-NEXT:    bx lr
847  %c = call i8 @llvm.umin.i8(i8 %a, i8 %b)
848  ret i8 %c
849}
850
851declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone
852
853define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) {
854; CHECK-LABEL: umini16:
855; CHECK:       @ %bb.0:
856; CHECK-NEXT:    uxth r1, r1
857; CHECK-NEXT:    uxth r0, r0
858; CHECK-NEXT:    cmp r0, r1
859; CHECK-NEXT:    csel r0, r0, r1, lo
860; CHECK-NEXT:    bx lr
861  %c = call i16 @llvm.umin.i16(i16 %a, i16 %b)
862  ret i16 %c
863}
864
865declare i32 @llvm.umin.i32(i32 %a, i32 %b) readnone
866
867define arm_aapcs_vfpcc i32 @umini32(i32 %a, i32 %b) {
868; CHECK-LABEL: umini32:
869; CHECK:       @ %bb.0:
870; CHECK-NEXT:    cmp r0, r1
871; CHECK-NEXT:    csel r0, r0, r1, lo
872; CHECK-NEXT:    bx lr
873  %c = call i32 @llvm.umin.i32(i32 %a, i32 %b)
874  ret i32 %c
875}
876
877declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone
878
879define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) {
880; CHECK-LABEL: umini64:
881; CHECK:       @ %bb.0:
882; CHECK-NEXT:    subs.w r12, r0, r2
883; CHECK-NEXT:    sbcs.w r12, r1, r3
884; CHECK-NEXT:    csel r0, r0, r2, lo
885; CHECK-NEXT:    csel r1, r1, r3, lo
886; CHECK-NEXT:    bx lr
887  %c = call i64 @llvm.umin.i64(i64 %a, i64 %b)
888  ret i64 %c
889}
890
891declare <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
892
893define arm_aapcs_vfpcc <8 x i8> @umin8i8(<8 x i8> %a, <8 x i8> %b) {
894; CHECK-LABEL: umin8i8:
895; CHECK:       @ %bb.0:
896; CHECK-NEXT:    vmovlb.u8 q1, q1
897; CHECK-NEXT:    vmovlb.u8 q0, q0
898; CHECK-NEXT:    vmin.u16 q0, q0, q1
899; CHECK-NEXT:    bx lr
900  %c = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
901  ret <8 x i8> %c
902}
903
904declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
905
906define arm_aapcs_vfpcc <16 x i8> @umin16i8(<16 x i8> %a, <16 x i8> %b) {
907; CHECK-LABEL: umin16i8:
908; CHECK:       @ %bb.0:
909; CHECK-NEXT:    vmin.u8 q0, q0, q1
910; CHECK-NEXT:    bx lr
911  %c = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
912  ret <16 x i8> %c
913}
914
915declare <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
916
917define arm_aapcs_vfpcc void @umin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
918; CHECK-LABEL: umin32i8:
919; CHECK:       @ %bb.0:
920; CHECK-NEXT:    vmin.u8 q1, q1, q3
921; CHECK-NEXT:    vmin.u8 q0, q0, q2
922; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
923; CHECK-NEXT:    vstrw.32 q0, [r0]
924; CHECK-NEXT:    bx lr
925  %c = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b)
926  store <32 x i8> %c, ptr %p
927  ret void
928}
929
930declare <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
931
932define arm_aapcs_vfpcc <4 x i16> @umin4i16(<4 x i16> %a, <4 x i16> %b) {
933; CHECK-LABEL: umin4i16:
934; CHECK:       @ %bb.0:
935; CHECK-NEXT:    vmovlb.u16 q1, q1
936; CHECK-NEXT:    vmovlb.u16 q0, q0
937; CHECK-NEXT:    vmin.u32 q0, q0, q1
938; CHECK-NEXT:    bx lr
939  %c = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
940  ret <4 x i16> %c
941}
942
943declare <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
944
945define arm_aapcs_vfpcc <8 x i16> @umin8i16(<8 x i16> %a, <8 x i16> %b) {
946; CHECK-LABEL: umin8i16:
947; CHECK:       @ %bb.0:
948; CHECK-NEXT:    vmin.u16 q0, q0, q1
949; CHECK-NEXT:    bx lr
950  %c = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
951  ret <8 x i16> %c
952}
953
954declare <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
955
956define arm_aapcs_vfpcc void @umin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
957; CHECK-LABEL: umin16i16:
958; CHECK:       @ %bb.0:
959; CHECK-NEXT:    vmin.u16 q1, q1, q3
960; CHECK-NEXT:    vmin.u16 q0, q0, q2
961; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
962; CHECK-NEXT:    vstrw.32 q0, [r0]
963; CHECK-NEXT:    bx lr
964  %c = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b)
965  store <16 x i16> %c, ptr %p
966  ret void
967}
968
969declare <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
970
971define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) {
972; CHECK-LABEL: umin2i32:
973; CHECK:       @ %bb.0:
974; CHECK-NEXT:    vmov.i64 q2, #0xffffffff
975; CHECK-NEXT:    vand q1, q1, q2
976; CHECK-NEXT:    vand q0, q0, q2
977; CHECK-NEXT:    vmov r0, r1, d2
978; CHECK-NEXT:    vmov r2, r3, d0
979; CHECK-NEXT:    subs r0, r2, r0
980; CHECK-NEXT:    sbcs.w r0, r3, r1
981; CHECK-NEXT:    mov.w r1, #0
982; CHECK-NEXT:    csetm r0, lo
983; CHECK-NEXT:    vmov r3, r2, d1
984; CHECK-NEXT:    bfi r1, r0, #0, #8
985; CHECK-NEXT:    vmov r0, r12, d3
986; CHECK-NEXT:    subs r0, r3, r0
987; CHECK-NEXT:    sbcs.w r0, r2, r12
988; CHECK-NEXT:    csetm r0, lo
989; CHECK-NEXT:    bfi r1, r0, #8, #8
990; CHECK-NEXT:    vmsr p0, r1
991; CHECK-NEXT:    vpsel q0, q0, q1
992; CHECK-NEXT:    bx lr
993  %c = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
994  ret <2 x i32> %c
995}
996
997declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
998
999define arm_aapcs_vfpcc <4 x i32> @umin4i32(<4 x i32> %a, <4 x i32> %b) {
1000; CHECK-LABEL: umin4i32:
1001; CHECK:       @ %bb.0:
1002; CHECK-NEXT:    vmin.u32 q0, q0, q1
1003; CHECK-NEXT:    bx lr
1004  %c = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
1005  ret <4 x i32> %c
1006}
1007
1008declare <8 x i32> @llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
1009
1010define arm_aapcs_vfpcc void @umin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
1011; CHECK-LABEL: umin8i32:
1012; CHECK:       @ %bb.0:
1013; CHECK-NEXT:    vmin.u32 q1, q1, q3
1014; CHECK-NEXT:    vmin.u32 q0, q0, q2
1015; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
1016; CHECK-NEXT:    vstrw.32 q0, [r0]
1017; CHECK-NEXT:    bx lr
1018  %c = call <8 x i32>@llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b)
1019  store <8 x i32> %c, ptr %p
1020  ret void
1021}
1022
1023declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
1024
1025define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
1026; CHECK-LABEL: umin1i64:
1027; CHECK:       @ %bb.0:
1028; CHECK-NEXT:    .pad #8
1029; CHECK-NEXT:    sub sp, #8
1030; CHECK-NEXT:    subs.w r12, r0, r2
1031; CHECK-NEXT:    sbcs.w r12, r1, r3
1032; CHECK-NEXT:    csel r0, r0, r2, lo
1033; CHECK-NEXT:    csel r1, r1, r3, lo
1034; CHECK-NEXT:    add sp, #8
1035; CHECK-NEXT:    bx lr
1036  %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)
1037  ret <1 x i64> %c
1038}
1039
1040declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
1041
1042define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
1043; CHECK-LABEL: umin2i64:
1044; CHECK:       @ %bb.0:
1045; CHECK-NEXT:    vmov r0, r1, d2
1046; CHECK-NEXT:    vmov r2, r3, d0
1047; CHECK-NEXT:    subs r0, r2, r0
1048; CHECK-NEXT:    sbcs.w r0, r3, r1
1049; CHECK-NEXT:    mov.w r1, #0
1050; CHECK-NEXT:    csetm r0, lo
1051; CHECK-NEXT:    vmov r3, r2, d1
1052; CHECK-NEXT:    bfi r1, r0, #0, #8
1053; CHECK-NEXT:    vmov r0, r12, d3
1054; CHECK-NEXT:    subs r0, r3, r0
1055; CHECK-NEXT:    sbcs.w r0, r2, r12
1056; CHECK-NEXT:    csetm r0, lo
1057; CHECK-NEXT:    bfi r1, r0, #8, #8
1058; CHECK-NEXT:    vmsr p0, r1
1059; CHECK-NEXT:    vpsel q0, q0, q1
1060; CHECK-NEXT:    bx lr
1061  %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
1062  ret <2 x i64> %c
1063}
1064
1065declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
1066
1067define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
1068; CHECK-LABEL: umin4i64:
1069; CHECK:       @ %bb.0:
1070; CHECK-NEXT:    .save {r4, lr}
1071; CHECK-NEXT:    push {r4, lr}
1072; CHECK-NEXT:    vmov r1, r12, d6
1073; CHECK-NEXT:    vmov r3, r2, d2
1074; CHECK-NEXT:    subs r1, r3, r1
1075; CHECK-NEXT:    mov.w r3, #0
1076; CHECK-NEXT:    sbcs.w r1, r2, r12
1077; CHECK-NEXT:    vmov lr, r12, d7
1078; CHECK-NEXT:    csetm r2, lo
1079; CHECK-NEXT:    movs r1, #0
1080; CHECK-NEXT:    bfi r3, r2, #0, #8
1081; CHECK-NEXT:    vmov r2, r4, d3
1082; CHECK-NEXT:    subs.w r2, r2, lr
1083; CHECK-NEXT:    sbcs.w r2, r4, r12
1084; CHECK-NEXT:    csetm r2, lo
1085; CHECK-NEXT:    bfi r3, r2, #8, #8
1086; CHECK-NEXT:    vmov r2, r12, d4
1087; CHECK-NEXT:    vmsr p0, r3
1088; CHECK-NEXT:    vmov r4, r3, d0
1089; CHECK-NEXT:    vpsel q1, q1, q3
1090; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
1091; CHECK-NEXT:    subs r2, r4, r2
1092; CHECK-NEXT:    sbcs.w r2, r3, r12
1093; CHECK-NEXT:    vmov r4, r3, d1
1094; CHECK-NEXT:    csetm r2, lo
1095; CHECK-NEXT:    bfi r1, r2, #0, #8
1096; CHECK-NEXT:    vmov r2, r12, d5
1097; CHECK-NEXT:    subs r2, r4, r2
1098; CHECK-NEXT:    sbcs.w r2, r3, r12
1099; CHECK-NEXT:    csetm r2, lo
1100; CHECK-NEXT:    bfi r1, r2, #8, #8
1101; CHECK-NEXT:    vmsr p0, r1
1102; CHECK-NEXT:    vpsel q0, q0, q2
1103; CHECK-NEXT:    vstrw.32 q0, [r0]
1104; CHECK-NEXT:    pop {r4, pc}
1105  %c = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b)
1106  store <4 x i64> %c, ptr %p
1107  ret void
1108}
1109