xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @sadd_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
5; CHECK-LABEL: sadd_int8_t:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vqadd.s8 q0, q0, q1
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @sadd_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
15; CHECK-LABEL: sadd_int16_t:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vqadd.s16 q0, q0, q1
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <4 x i32> @sadd_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
25; CHECK-LABEL: sadd_int32_t:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vqadd.s32 q0, q0, q1
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
31  ret <4 x i32> %0
32}
33
34define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
35; CHECK-LABEL: sadd_int64_t:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    .save {r4, r5, r7, lr}
38; CHECK-NEXT:    push {r4, r5, r7, lr}
39; CHECK-NEXT:    vmov r0, r1, d2
40; CHECK-NEXT:    vmov r2, r3, d0
41; CHECK-NEXT:    adds.w r12, r2, r0
42; CHECK-NEXT:    vmov r0, r4, d1
43; CHECK-NEXT:    adc.w lr, r3, r1
44; CHECK-NEXT:    subs.w r2, r12, r2
45; CHECK-NEXT:    sbcs.w r2, lr, r3
46; CHECK-NEXT:    cset r2, lt
47; CHECK-NEXT:    cmp r1, #0
48; CHECK-NEXT:    it mi
49; CHECK-NEXT:    eormi r2, r2, #1
50; CHECK-NEXT:    rsbs r1, r2, #0
51; CHECK-NEXT:    movs r2, #0
52; CHECK-NEXT:    bfi r2, r1, #0, #8
53; CHECK-NEXT:    vmov r1, r3, d3
54; CHECK-NEXT:    adds r1, r1, r0
55; CHECK-NEXT:    adc.w r5, r4, r3
56; CHECK-NEXT:    subs r0, r1, r0
57; CHECK-NEXT:    sbcs.w r0, r5, r4
58; CHECK-NEXT:    vmov q0[2], q0[0], r12, r1
59; CHECK-NEXT:    cset r0, lt
60; CHECK-NEXT:    cmp r3, #0
61; CHECK-NEXT:    it mi
62; CHECK-NEXT:    eormi r0, r0, #1
63; CHECK-NEXT:    asr.w r1, lr, #31
64; CHECK-NEXT:    rsbs r0, r0, #0
65; CHECK-NEXT:    vmov q0[3], q0[1], lr, r5
66; CHECK-NEXT:    bfi r2, r0, #8, #8
67; CHECK-NEXT:    asrs r0, r5, #31
68; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
69; CHECK-NEXT:    vmsr p0, r2
70; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
71; CHECK-NEXT:    adr r0, .LCPI3_0
72; CHECK-NEXT:    vldrw.u32 q2, [r0]
73; CHECK-NEXT:    veor q1, q1, q2
74; CHECK-NEXT:    vpsel q0, q1, q0
75; CHECK-NEXT:    pop {r4, r5, r7, pc}
76; CHECK-NEXT:    .p2align 4
77; CHECK-NEXT:  @ %bb.1:
78; CHECK-NEXT:  .LCPI3_0:
79; CHECK-NEXT:    .long 0 @ 0x0
80; CHECK-NEXT:    .long 2147483648 @ 0x80000000
81; CHECK-NEXT:    .long 0 @ 0x0
82; CHECK-NEXT:    .long 2147483648 @ 0x80000000
83entry:
84  %0 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
85  ret <2 x i64> %0
86}
87
88define arm_aapcs_vfpcc <16 x i8> @uadd_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
89; CHECK-LABEL: uadd_int8_t:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vqadd.u8 q0, q0, q1
92; CHECK-NEXT:    bx lr
93entry:
94  %0 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
95  ret <16 x i8> %0
96}
97
98define arm_aapcs_vfpcc <8 x i16> @uadd_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
99; CHECK-LABEL: uadd_int16_t:
100; CHECK:       @ %bb.0: @ %entry
101; CHECK-NEXT:    vqadd.u16 q0, q0, q1
102; CHECK-NEXT:    bx lr
103entry:
104  %0 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
105  ret <8 x i16> %0
106}
107
108define arm_aapcs_vfpcc <4 x i32> @uadd_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
109; CHECK-LABEL: uadd_int32_t:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    vqadd.u32 q0, q0, q1
112; CHECK-NEXT:    bx lr
113entry:
114  %0 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
115  ret <4 x i32> %0
116}
117
118define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
119; CHECK-LABEL: uadd_int64_t:
120; CHECK:       @ %bb.0: @ %entry
121; CHECK-NEXT:    .save {r4, r5, r7, lr}
122; CHECK-NEXT:    push {r4, r5, r7, lr}
123; CHECK-NEXT:    vmov r0, r1, d3
124; CHECK-NEXT:    vmov r2, r3, d1
125; CHECK-NEXT:    adds r5, r2, r0
126; CHECK-NEXT:    adc.w lr, r3, r1
127; CHECK-NEXT:    subs r2, r5, r2
128; CHECK-NEXT:    sbcs.w r2, lr, r3
129; CHECK-NEXT:    vmov r3, r12, d2
130; CHECK-NEXT:    vmov r1, r4, d0
131; CHECK-NEXT:    csetm r2, lo
132; CHECK-NEXT:    adds r3, r3, r1
133; CHECK-NEXT:    adc.w r0, r4, r12
134; CHECK-NEXT:    subs r1, r3, r1
135; CHECK-NEXT:    sbcs.w r1, r0, r4
136; CHECK-NEXT:    vmov q1[2], q1[0], r3, r5
137; CHECK-NEXT:    csetm r1, lo
138; CHECK-NEXT:    vmov q1[3], q1[1], r0, lr
139; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
140; CHECK-NEXT:    vmov q0[3], q0[1], r1, r2
141; CHECK-NEXT:    vorr q0, q1, q0
142; CHECK-NEXT:    pop {r4, r5, r7, pc}
143entry:
144  %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
145  ret <2 x i64> %0
146}
147
148
149define arm_aapcs_vfpcc <16 x i8> @ssub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
150; CHECK-LABEL: ssub_int8_t:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    vqsub.s8 q0, q0, q1
153; CHECK-NEXT:    bx lr
154entry:
155  %0 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
156  ret <16 x i8> %0
157}
158
159define arm_aapcs_vfpcc <8 x i16> @ssub_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
160; CHECK-LABEL: ssub_int16_t:
161; CHECK:       @ %bb.0: @ %entry
162; CHECK-NEXT:    vqsub.s16 q0, q0, q1
163; CHECK-NEXT:    bx lr
164entry:
165  %0 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
166  ret <8 x i16> %0
167}
168
169define arm_aapcs_vfpcc <4 x i32> @ssub_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
170; CHECK-LABEL: ssub_int32_t:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vqsub.s32 q0, q0, q1
173; CHECK-NEXT:    bx lr
174entry:
175  %0 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
176  ret <4 x i32> %0
177}
178
179define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
180; CHECK-LABEL: ssub_int64_t:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
183; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
184; CHECK-NEXT:    vmov r2, r3, d2
185; CHECK-NEXT:    vmov r1, r0, d0
186; CHECK-NEXT:    vmov r4, r5, d1
187; CHECK-NEXT:    subs.w r12, r1, r2
188; CHECK-NEXT:    sbc.w lr, r0, r3
189; CHECK-NEXT:    subs.w r1, r12, r1
190; CHECK-NEXT:    sbcs.w r0, lr, r0
191; CHECK-NEXT:    mov.w r1, #0
192; CHECK-NEXT:    cset r0, lt
193; CHECK-NEXT:    rsbs r2, r2, #0
194; CHECK-NEXT:    sbcs.w r2, r1, r3
195; CHECK-NEXT:    it lt
196; CHECK-NEXT:    eorlt r0, r0, #1
197; CHECK-NEXT:    vmov r2, r3, d3
198; CHECK-NEXT:    rsbs r0, r0, #0
199; CHECK-NEXT:    subs r6, r4, r2
200; CHECK-NEXT:    sbc.w r7, r5, r3
201; CHECK-NEXT:    subs r4, r6, r4
202; CHECK-NEXT:    sbcs.w r4, r7, r5
203; CHECK-NEXT:    vmov q0[2], q0[0], r12, r6
204; CHECK-NEXT:    cset r4, lt
205; CHECK-NEXT:    rsbs r2, r2, #0
206; CHECK-NEXT:    sbcs.w r2, r1, r3
207; CHECK-NEXT:    bfi r1, r0, #0, #8
208; CHECK-NEXT:    it lt
209; CHECK-NEXT:    eorlt r4, r4, #1
210; CHECK-NEXT:    rsbs r0, r4, #0
211; CHECK-NEXT:    bfi r1, r0, #8, #8
212; CHECK-NEXT:    asrs r0, r7, #31
213; CHECK-NEXT:    vmsr p0, r1
214; CHECK-NEXT:    asr.w r1, lr, #31
215; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
216; CHECK-NEXT:    vmov q0[3], q0[1], lr, r7
217; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
218; CHECK-NEXT:    adr r0, .LCPI11_0
219; CHECK-NEXT:    vldrw.u32 q2, [r0]
220; CHECK-NEXT:    veor q1, q1, q2
221; CHECK-NEXT:    vpsel q0, q1, q0
222; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
223; CHECK-NEXT:    .p2align 4
224; CHECK-NEXT:  @ %bb.1:
225; CHECK-NEXT:  .LCPI11_0:
226; CHECK-NEXT:    .long 0 @ 0x0
227; CHECK-NEXT:    .long 2147483648 @ 0x80000000
228; CHECK-NEXT:    .long 0 @ 0x0
229; CHECK-NEXT:    .long 2147483648 @ 0x80000000
230entry:
231  %0 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
232  ret <2 x i64> %0
233}
234
235define arm_aapcs_vfpcc <16 x i8> @usub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
236; CHECK-LABEL: usub_int8_t:
237; CHECK:       @ %bb.0: @ %entry
238; CHECK-NEXT:    vqsub.u8 q0, q0, q1
239; CHECK-NEXT:    bx lr
240entry:
241  %0 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
242  ret <16 x i8> %0
243}
244
245define arm_aapcs_vfpcc <8 x i16> @usub_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
246; CHECK-LABEL: usub_int16_t:
247; CHECK:       @ %bb.0: @ %entry
248; CHECK-NEXT:    vqsub.u16 q0, q0, q1
249; CHECK-NEXT:    bx lr
250entry:
251  %0 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
252  ret <8 x i16> %0
253}
254
255define arm_aapcs_vfpcc <4 x i32> @usub_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
256; CHECK-LABEL: usub_int32_t:
257; CHECK:       @ %bb.0: @ %entry
258; CHECK-NEXT:    vqsub.u32 q0, q0, q1
259; CHECK-NEXT:    bx lr
260entry:
261  %0 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
262  ret <4 x i32> %0
263}
264
265define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
266; CHECK-LABEL: usub_int64_t:
267; CHECK:       @ %bb.0: @ %entry
268; CHECK-NEXT:    .save {r4, r5, r7, lr}
269; CHECK-NEXT:    push {r4, r5, r7, lr}
270; CHECK-NEXT:    vmov r0, r1, d3
271; CHECK-NEXT:    vmov r2, r3, d1
272; CHECK-NEXT:    subs r5, r2, r0
273; CHECK-NEXT:    sbc.w lr, r3, r1
274; CHECK-NEXT:    subs r2, r2, r5
275; CHECK-NEXT:    sbcs.w r2, r3, lr
276; CHECK-NEXT:    vmov r3, r12, d2
277; CHECK-NEXT:    vmov r1, r4, d0
278; CHECK-NEXT:    csetm r2, lo
279; CHECK-NEXT:    subs r3, r1, r3
280; CHECK-NEXT:    sbc.w r0, r4, r12
281; CHECK-NEXT:    subs r1, r1, r3
282; CHECK-NEXT:    sbcs.w r1, r4, r0
283; CHECK-NEXT:    vmov q1[2], q1[0], r3, r5
284; CHECK-NEXT:    csetm r1, lo
285; CHECK-NEXT:    vmov q1[3], q1[1], r0, lr
286; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
287; CHECK-NEXT:    vmov q0[3], q0[1], r1, r2
288; CHECK-NEXT:    vbic q0, q1, q0
289; CHECK-NEXT:    pop {r4, r5, r7, pc}
290entry:
291  %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
292  ret <2 x i64> %0
293}
294
295
296declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
297declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
298declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
299declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
300declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
301declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
302declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
303declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
304declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
305declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
306declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
307declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
308declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
309declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
310declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
311declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
312