xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
6; CHECK-LABEL: add_int8_t:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vadd.i8 q0, q0, q1
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = add <16 x i8> %src1, %src2
12  ret <16 x i8> %0
13}
14
15define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
16; CHECK-LABEL: add_int16_t:
17; CHECK:       @ %bb.0: @ %entry
18; CHECK-NEXT:    vadd.i16 q0, q0, q1
19; CHECK-NEXT:    bx lr
20entry:
21  %0 = add <8 x i16> %src1, %src2
22  ret <8 x i16> %0
23}
24
25define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
26; CHECK-LABEL: add_int32_t:
27; CHECK:       @ %bb.0: @ %entry
28; CHECK-NEXT:    vadd.i32 q0, q0, q1
29; CHECK-NEXT:    bx lr
30entry:
31  %0 = add nsw <4 x i32> %src1, %src2
32  ret <4 x i32> %0
33}
34
35define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
36; CHECK-LABEL: add_int64_t:
37; CHECK:       @ %bb.0: @ %entry
38; CHECK-NEXT:    .save {r4, r5, r7, lr}
39; CHECK-NEXT:    push {r4, r5, r7, lr}
40; CHECK-NEXT:    vmov lr, r12, d3
41; CHECK-NEXT:    vmov r2, r3, d1
42; CHECK-NEXT:    vmov r1, r0, d2
43; CHECK-NEXT:    vmov r4, r5, d0
44; CHECK-NEXT:    adds.w r2, r2, lr
45; CHECK-NEXT:    adc.w r3, r3, r12
46; CHECK-NEXT:    adds r1, r1, r4
47; CHECK-NEXT:    adcs r0, r5
48; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
49; CHECK-NEXT:    vmov q0[3], q0[1], r0, r3
50; CHECK-NEXT:    pop {r4, r5, r7, pc}
51entry:
52  %0 = add nsw <2 x i64> %src1, %src2
53  ret <2 x i64> %0
54}
55
56define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
57; CHECK-MVE-LABEL: add_float32_t:
58; CHECK-MVE:       @ %bb.0: @ %entry
59; CHECK-MVE-NEXT:    vadd.f32 s3, s7, s3
60; CHECK-MVE-NEXT:    vadd.f32 s2, s6, s2
61; CHECK-MVE-NEXT:    vadd.f32 s1, s5, s1
62; CHECK-MVE-NEXT:    vadd.f32 s0, s4, s0
63; CHECK-MVE-NEXT:    bx lr
64;
65; CHECK-MVEFP-LABEL: add_float32_t:
66; CHECK-MVEFP:       @ %bb.0: @ %entry
67; CHECK-MVEFP-NEXT:    vadd.f32 q0, q1, q0
68; CHECK-MVEFP-NEXT:    bx lr
69entry:
70  %0 = fadd nnan ninf nsz <4 x float> %src2, %src1
71  ret <4 x float> %0
72}
73
74define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) {
75; CHECK-MVE-LABEL: add_float16_t:
76; CHECK-MVE:       @ %bb.0: @ %entry
77; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
78; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
79; CHECK-MVE-NEXT:    vadd.f16 s0, s4, s0
80; CHECK-MVE-NEXT:    vadd.f16 s8, s10, s8
81; CHECK-MVE-NEXT:    vins.f16 s0, s8
82; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
83; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
84; CHECK-MVE-NEXT:    vadd.f16 s1, s5, s1
85; CHECK-MVE-NEXT:    vadd.f16 s4, s8, s4
86; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
87; CHECK-MVE-NEXT:    vins.f16 s1, s4
88; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
89; CHECK-MVE-NEXT:    vadd.f16 s2, s6, s2
90; CHECK-MVE-NEXT:    vadd.f16 s4, s8, s4
91; CHECK-MVE-NEXT:    vins.f16 s2, s4
92; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
93; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
94; CHECK-MVE-NEXT:    vadd.f16 s3, s7, s3
95; CHECK-MVE-NEXT:    vadd.f16 s4, s6, s4
96; CHECK-MVE-NEXT:    vins.f16 s3, s4
97; CHECK-MVE-NEXT:    bx lr
98;
99; CHECK-MVEFP-LABEL: add_float16_t:
100; CHECK-MVEFP:       @ %bb.0: @ %entry
101; CHECK-MVEFP-NEXT:    vadd.f16 q0, q1, q0
102; CHECK-MVEFP-NEXT:    bx lr
103entry:
104  %0 = fadd nnan ninf nsz <8 x half> %src2, %src1
105  ret <8 x half> %0
106}
107
108define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
109; CHECK-LABEL: add_float64_t:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    .save {r7, lr}
112; CHECK-NEXT:    push {r7, lr}
113; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
114; CHECK-NEXT:    vpush {d8, d9, d10, d11}
115; CHECK-NEXT:    vmov q5, q1
116; CHECK-NEXT:    vmov q4, q0
117; CHECK-NEXT:    vmov r0, r1, d11
118; CHECK-NEXT:    vmov r2, r3, d9
119; CHECK-NEXT:    bl __aeabi_dadd
120; CHECK-NEXT:    vmov lr, r12, d10
121; CHECK-NEXT:    vmov r2, r3, d8
122; CHECK-NEXT:    vmov d9, r0, r1
123; CHECK-NEXT:    mov r0, lr
124; CHECK-NEXT:    mov r1, r12
125; CHECK-NEXT:    bl __aeabi_dadd
126; CHECK-NEXT:    vmov d8, r0, r1
127; CHECK-NEXT:    vmov q0, q4
128; CHECK-NEXT:    vpop {d8, d9, d10, d11}
129; CHECK-NEXT:    pop {r7, pc}
130entry:
131  %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
132  ret <2 x double> %0
133}
134
135
136define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
137; CHECK-LABEL: sub_int8_t:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vsub.i8 q0, q1, q0
140; CHECK-NEXT:    bx lr
141entry:
142  %0 = sub <16 x i8> %src2, %src1
143  ret <16 x i8> %0
144}
145
146define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
147; CHECK-LABEL: sub_int16_t:
148; CHECK:       @ %bb.0: @ %entry
149; CHECK-NEXT:    vsub.i16 q0, q1, q0
150; CHECK-NEXT:    bx lr
151entry:
152  %0 = sub <8 x i16> %src2, %src1
153  ret <8 x i16> %0
154}
155
156define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
157; CHECK-LABEL: sub_int32_t:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    vsub.i32 q0, q1, q0
160; CHECK-NEXT:    bx lr
161entry:
162  %0 = sub nsw <4 x i32> %src2, %src1
163  ret <4 x i32> %0
164}
165
166define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
167; CHECK-LABEL: sub_int64_t:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    .save {r4, r5, r7, lr}
170; CHECK-NEXT:    push {r4, r5, r7, lr}
171; CHECK-NEXT:    vmov lr, r12, d1
172; CHECK-NEXT:    vmov r2, r3, d3
173; CHECK-NEXT:    vmov r1, r0, d0
174; CHECK-NEXT:    vmov r4, r5, d2
175; CHECK-NEXT:    subs.w r2, r2, lr
176; CHECK-NEXT:    sbc.w r3, r3, r12
177; CHECK-NEXT:    subs r1, r4, r1
178; CHECK-NEXT:    sbc.w r0, r5, r0
179; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
180; CHECK-NEXT:    vmov q0[3], q0[1], r0, r3
181; CHECK-NEXT:    pop {r4, r5, r7, pc}
182entry:
183  %0 = sub nsw <2 x i64> %src2, %src1
184  ret <2 x i64> %0
185}
186
187define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
188; CHECK-MVE-LABEL: sub_float32_t:
189; CHECK-MVE:       @ %bb.0: @ %entry
190; CHECK-MVE-NEXT:    vsub.f32 s3, s7, s3
191; CHECK-MVE-NEXT:    vsub.f32 s2, s6, s2
192; CHECK-MVE-NEXT:    vsub.f32 s1, s5, s1
193; CHECK-MVE-NEXT:    vsub.f32 s0, s4, s0
194; CHECK-MVE-NEXT:    bx lr
195;
196; CHECK-MVEFP-LABEL: sub_float32_t:
197; CHECK-MVEFP:       @ %bb.0: @ %entry
198; CHECK-MVEFP-NEXT:    vsub.f32 q0, q1, q0
199; CHECK-MVEFP-NEXT:    bx lr
200entry:
201  %0 = fsub nnan ninf nsz <4 x float> %src2, %src1
202  ret <4 x float> %0
203}
204
205define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) {
206; CHECK-MVE-LABEL: sub_float16_t:
207; CHECK-MVE:       @ %bb.0: @ %entry
208; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
209; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
210; CHECK-MVE-NEXT:    vsub.f16 s0, s4, s0
211; CHECK-MVE-NEXT:    vsub.f16 s8, s10, s8
212; CHECK-MVE-NEXT:    vins.f16 s0, s8
213; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
214; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
215; CHECK-MVE-NEXT:    vsub.f16 s1, s5, s1
216; CHECK-MVE-NEXT:    vsub.f16 s4, s8, s4
217; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
218; CHECK-MVE-NEXT:    vins.f16 s1, s4
219; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
220; CHECK-MVE-NEXT:    vsub.f16 s2, s6, s2
221; CHECK-MVE-NEXT:    vsub.f16 s4, s8, s4
222; CHECK-MVE-NEXT:    vins.f16 s2, s4
223; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
224; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
225; CHECK-MVE-NEXT:    vsub.f16 s3, s7, s3
226; CHECK-MVE-NEXT:    vsub.f16 s4, s6, s4
227; CHECK-MVE-NEXT:    vins.f16 s3, s4
228; CHECK-MVE-NEXT:    bx lr
229;
230; CHECK-MVEFP-LABEL: sub_float16_t:
231; CHECK-MVEFP:       @ %bb.0: @ %entry
232; CHECK-MVEFP-NEXT:    vsub.f16 q0, q1, q0
233; CHECK-MVEFP-NEXT:    bx lr
234entry:
235  %0 = fsub nnan ninf nsz <8 x half> %src2, %src1
236  ret <8 x half> %0
237}
238
239define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
240; CHECK-LABEL: sub_float64_t:
241; CHECK:       @ %bb.0: @ %entry
242; CHECK-NEXT:    .save {r7, lr}
243; CHECK-NEXT:    push {r7, lr}
244; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
245; CHECK-NEXT:    vpush {d8, d9, d10, d11}
246; CHECK-NEXT:    vmov q5, q1
247; CHECK-NEXT:    vmov q4, q0
248; CHECK-NEXT:    vmov r0, r1, d11
249; CHECK-NEXT:    vmov r2, r3, d9
250; CHECK-NEXT:    bl __aeabi_dsub
251; CHECK-NEXT:    vmov lr, r12, d10
252; CHECK-NEXT:    vmov r2, r3, d8
253; CHECK-NEXT:    vmov d9, r0, r1
254; CHECK-NEXT:    mov r0, lr
255; CHECK-NEXT:    mov r1, r12
256; CHECK-NEXT:    bl __aeabi_dsub
257; CHECK-NEXT:    vmov d8, r0, r1
258; CHECK-NEXT:    vmov q0, q4
259; CHECK-NEXT:    vpop {d8, d9, d10, d11}
260; CHECK-NEXT:    pop {r7, pc}
261entry:
262  %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
263  ret <2 x double> %0
264}
265
266
267define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
268; CHECK-LABEL: mul_int8_t:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    vmul.i8 q0, q0, q1
271; CHECK-NEXT:    bx lr
272entry:
273  %0 = mul <16 x i8> %src1, %src2
274  ret <16 x i8> %0
275}
276
277define arm_aapcs_vfpcc <8 x i16> @mul_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
278; CHECK-LABEL: mul_int16_t:
279; CHECK:       @ %bb.0: @ %entry
280; CHECK-NEXT:    vmul.i16 q0, q0, q1
281; CHECK-NEXT:    bx lr
282entry:
283  %0 = mul <8 x i16> %src1, %src2
284  ret <8 x i16> %0
285}
286
287define arm_aapcs_vfpcc <4 x i32> @mul_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
288; CHECK-LABEL: mul_int32_t:
289; CHECK:       @ %bb.0: @ %entry
290; CHECK-NEXT:    vmul.i32 q0, q0, q1
291; CHECK-NEXT:    bx lr
292entry:
293  %0 = mul nsw <4 x i32> %src1, %src2
294  ret <4 x i32> %0
295}
296
297define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
298; CHECK-LABEL: mul_int64_t:
299; CHECK:       @ %bb.0: @ %entry
300; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
301; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
302; CHECK-NEXT:    vmov r0, r1, d2
303; CHECK-NEXT:    vmov r2, lr, d0
304; CHECK-NEXT:    vmov r4, r5, d3
305; CHECK-NEXT:    umull r12, r3, r2, r0
306; CHECK-NEXT:    mla r1, r2, r1, r3
307; CHECK-NEXT:    vmov r2, r3, d1
308; CHECK-NEXT:    mla r0, lr, r0, r1
309; CHECK-NEXT:    umull r6, r7, r2, r4
310; CHECK-NEXT:    mla r2, r2, r5, r7
311; CHECK-NEXT:    vmov q0[2], q0[0], r12, r6
312; CHECK-NEXT:    mla r2, r3, r4, r2
313; CHECK-NEXT:    vmov q0[3], q0[1], r0, r2
314; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
315entry:
316  %0 = mul nsw <2 x i64> %src1, %src2
317  ret <2 x i64> %0
318}
319
320define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
321; CHECK-MVE-LABEL: mul_float16_t:
322; CHECK-MVE:       @ %bb.0: @ %entry
323; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
324; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
325; CHECK-MVE-NEXT:    vmul.f16 s0, s4, s0
326; CHECK-MVE-NEXT:    vmul.f16 s8, s10, s8
327; CHECK-MVE-NEXT:    vins.f16 s0, s8
328; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
329; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
330; CHECK-MVE-NEXT:    vmul.f16 s1, s5, s1
331; CHECK-MVE-NEXT:    vmul.f16 s4, s8, s4
332; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
333; CHECK-MVE-NEXT:    vins.f16 s1, s4
334; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
335; CHECK-MVE-NEXT:    vmul.f16 s2, s6, s2
336; CHECK-MVE-NEXT:    vmul.f16 s4, s8, s4
337; CHECK-MVE-NEXT:    vins.f16 s2, s4
338; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
339; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
340; CHECK-MVE-NEXT:    vmul.f16 s3, s7, s3
341; CHECK-MVE-NEXT:    vmul.f16 s4, s6, s4
342; CHECK-MVE-NEXT:    vins.f16 s3, s4
343; CHECK-MVE-NEXT:    bx lr
344;
345; CHECK-MVEFP-LABEL: mul_float16_t:
346; CHECK-MVEFP:       @ %bb.0: @ %entry
347; CHECK-MVEFP-NEXT:    vmul.f16 q0, q1, q0
348; CHECK-MVEFP-NEXT:    bx lr
349entry:
350  %0 = fmul nnan ninf nsz <8 x half> %src2, %src1
351  ret <8 x half> %0
352}
353
354define arm_aapcs_vfpcc <4 x float> @mul_float32_t(<4 x float> %src1, <4 x float> %src2) {
355; CHECK-MVE-LABEL: mul_float32_t:
356; CHECK-MVE:       @ %bb.0: @ %entry
357; CHECK-MVE-NEXT:    vmul.f32 s3, s7, s3
358; CHECK-MVE-NEXT:    vmul.f32 s2, s6, s2
359; CHECK-MVE-NEXT:    vmul.f32 s1, s5, s1
360; CHECK-MVE-NEXT:    vmul.f32 s0, s4, s0
361; CHECK-MVE-NEXT:    bx lr
362;
363; CHECK-MVEFP-LABEL: mul_float32_t:
364; CHECK-MVEFP:       @ %bb.0: @ %entry
365; CHECK-MVEFP-NEXT:    vmul.f32 q0, q1, q0
366; CHECK-MVEFP-NEXT:    bx lr
367entry:
368  %0 = fmul nnan ninf nsz <4 x float> %src2, %src1
369  ret <4 x float> %0
370}
371
372define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
373; CHECK-LABEL: mul_float64_t:
374; CHECK:       @ %bb.0: @ %entry
375; CHECK-NEXT:    .save {r7, lr}
376; CHECK-NEXT:    push {r7, lr}
377; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
378; CHECK-NEXT:    vpush {d8, d9, d10, d11}
379; CHECK-NEXT:    vmov q5, q1
380; CHECK-NEXT:    vmov q4, q0
381; CHECK-NEXT:    vmov r0, r1, d11
382; CHECK-NEXT:    vmov r2, r3, d9
383; CHECK-NEXT:    bl __aeabi_dmul
384; CHECK-NEXT:    vmov lr, r12, d10
385; CHECK-NEXT:    vmov r2, r3, d8
386; CHECK-NEXT:    vmov d9, r0, r1
387; CHECK-NEXT:    mov r0, lr
388; CHECK-NEXT:    mov r1, r12
389; CHECK-NEXT:    bl __aeabi_dmul
390; CHECK-NEXT:    vmov d8, r0, r1
391; CHECK-NEXT:    vmov q0, q4
392; CHECK-NEXT:    vpop {d8, d9, d10, d11}
393; CHECK-NEXT:    pop {r7, pc}
394entry:
395  %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
396  ret <2 x double> %0
397}
398
399