xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll (revision 78ff736db2313642c3d8dd74beee3bc0b21c5c2a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-MVE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefix=CHECK-BE
4; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-FP
5
6define <16 x i8> @vector_add_i8(<16 x i8> %lhs, <16 x i8> %rhs) {
7; CHECK-LE-LABEL: vector_add_i8:
8; CHECK-LE:       @ %bb.0: @ %entry
9; CHECK-LE-NEXT:    vmov d0, r0, r1
10; CHECK-LE-NEXT:    mov r0, sp
11; CHECK-LE-NEXT:    vldrw.u32 q1, [r0]
12; CHECK-LE-NEXT:    vmov d1, r2, r3
13; CHECK-LE-NEXT:    vadd.i8 q0, q0, q1
14; CHECK-LE-NEXT:    vmov r0, r1, d0
15; CHECK-LE-NEXT:    vmov r2, r3, d1
16; CHECK-LE-NEXT:    bx lr
17;
18; CHECK-BE-LABEL: vector_add_i8:
19; CHECK-BE:       @ %bb.0: @ %entry
20; CHECK-BE-NEXT:    vmov d0, r1, r0
21; CHECK-BE-NEXT:    mov r0, sp
22; CHECK-BE-NEXT:    vmov d1, r3, r2
23; CHECK-BE-NEXT:    vrev64.8 q1, q0
24; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
25; CHECK-BE-NEXT:    vadd.i8 q0, q1, q0
26; CHECK-BE-NEXT:    vrev64.8 q1, q0
27; CHECK-BE-NEXT:    vmov r1, r0, d2
28; CHECK-BE-NEXT:    vmov r3, r2, d3
29; CHECK-BE-NEXT:    bx lr
30entry:
31  %sum = add <16 x i8> %lhs, %rhs
32  ret <16 x i8> %sum
33}
34
35define <8 x i16> @vector_add_i16(<8 x i16> %lhs, <8 x i16> %rhs) {
36; CHECK-LE-LABEL: vector_add_i16:
37; CHECK-LE:       @ %bb.0: @ %entry
38; CHECK-LE-NEXT:    vmov d0, r0, r1
39; CHECK-LE-NEXT:    mov r0, sp
40; CHECK-LE-NEXT:    vldrw.u32 q1, [r0]
41; CHECK-LE-NEXT:    vmov d1, r2, r3
42; CHECK-LE-NEXT:    vadd.i16 q0, q0, q1
43; CHECK-LE-NEXT:    vmov r0, r1, d0
44; CHECK-LE-NEXT:    vmov r2, r3, d1
45; CHECK-LE-NEXT:    bx lr
46;
47; CHECK-BE-LABEL: vector_add_i16:
48; CHECK-BE:       @ %bb.0: @ %entry
49; CHECK-BE-NEXT:    vmov d0, r1, r0
50; CHECK-BE-NEXT:    mov r0, sp
51; CHECK-BE-NEXT:    vmov d1, r3, r2
52; CHECK-BE-NEXT:    vrev64.16 q1, q0
53; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
54; CHECK-BE-NEXT:    vadd.i16 q0, q1, q0
55; CHECK-BE-NEXT:    vrev64.16 q1, q0
56; CHECK-BE-NEXT:    vmov r1, r0, d2
57; CHECK-BE-NEXT:    vmov r3, r2, d3
58; CHECK-BE-NEXT:    bx lr
59entry:
60  %sum = add <8 x i16> %lhs, %rhs
61  ret <8 x i16> %sum
62}
63
64define <4 x i32> @vector_add_i32(<4 x i32> %lhs, <4 x i32> %rhs) {
65; CHECK-LE-LABEL: vector_add_i32:
66; CHECK-LE:       @ %bb.0: @ %entry
67; CHECK-LE-NEXT:    vmov d0, r0, r1
68; CHECK-LE-NEXT:    mov r0, sp
69; CHECK-LE-NEXT:    vldrw.u32 q1, [r0]
70; CHECK-LE-NEXT:    vmov d1, r2, r3
71; CHECK-LE-NEXT:    vadd.i32 q0, q0, q1
72; CHECK-LE-NEXT:    vmov r0, r1, d0
73; CHECK-LE-NEXT:    vmov r2, r3, d1
74; CHECK-LE-NEXT:    bx lr
75;
76; CHECK-BE-LABEL: vector_add_i32:
77; CHECK-BE:       @ %bb.0: @ %entry
78; CHECK-BE-NEXT:    vmov d0, r1, r0
79; CHECK-BE-NEXT:    mov r0, sp
80; CHECK-BE-NEXT:    vmov d1, r3, r2
81; CHECK-BE-NEXT:    vrev64.32 q1, q0
82; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
83; CHECK-BE-NEXT:    vadd.i32 q0, q1, q0
84; CHECK-BE-NEXT:    vrev64.32 q1, q0
85; CHECK-BE-NEXT:    vmov r1, r0, d2
86; CHECK-BE-NEXT:    vmov r3, r2, d3
87; CHECK-BE-NEXT:    bx lr
88entry:
89  %sum = add <4 x i32> %lhs, %rhs
90  ret <4 x i32> %sum
91}
92
93define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
94; CHECK-MVE-LABEL: vector_add_i64:
95; CHECK-MVE:       @ %bb.0: @ %entry
96; CHECK-MVE-NEXT:    .save {r7, lr}
97; CHECK-MVE-NEXT:    push {r7, lr}
98; CHECK-MVE-NEXT:    add.w r12, sp, #8
99; CHECK-MVE-NEXT:    vldrw.u32 q0, [r12]
100; CHECK-MVE-NEXT:    vmov r12, lr, d0
101; CHECK-MVE-NEXT:    adds.w r0, r0, r12
102; CHECK-MVE-NEXT:    adc.w r1, r1, lr
103; CHECK-MVE-NEXT:    vmov r12, lr, d1
104; CHECK-MVE-NEXT:    adds.w r2, r2, r12
105; CHECK-MVE-NEXT:    adc.w r3, r3, lr
106; CHECK-MVE-NEXT:    pop {r7, pc}
107;
108; CHECK-BE-LABEL: vector_add_i64:
109; CHECK-BE:       @ %bb.0: @ %entry
110; CHECK-BE-NEXT:    .save {r7, lr}
111; CHECK-BE-NEXT:    push {r7, lr}
112; CHECK-BE-NEXT:    add.w r12, sp, #8
113; CHECK-BE-NEXT:    vldrw.u32 q0, [r12]
114; CHECK-BE-NEXT:    vmov r12, lr, d0
115; CHECK-BE-NEXT:    adds.w r1, r1, lr
116; CHECK-BE-NEXT:    adc.w r0, r0, r12
117; CHECK-BE-NEXT:    vmov r12, lr, d1
118; CHECK-BE-NEXT:    adds.w r3, r3, lr
119; CHECK-BE-NEXT:    adc.w r2, r2, r12
120; CHECK-BE-NEXT:    pop {r7, pc}
121;
122; CHECK-FP-LABEL: vector_add_i64:
123; CHECK-FP:       @ %bb.0: @ %entry
124; CHECK-FP-NEXT:    .save {r4, r5, r7, lr}
125; CHECK-FP-NEXT:    push {r4, r5, r7, lr}
126; CHECK-FP-NEXT:    add.w r12, sp, #16
127; CHECK-FP-NEXT:    vldrw.u32 q0, [r12]
128; CHECK-FP-NEXT:    vmov r12, lr, d0
129; CHECK-FP-NEXT:    vmov r4, r5, d1
130; CHECK-FP-NEXT:    adds.w r0, r0, r12
131; CHECK-FP-NEXT:    adc.w r1, r1, lr
132; CHECK-FP-NEXT:    adds r2, r2, r4
133; CHECK-FP-NEXT:    adcs r3, r5
134; CHECK-FP-NEXT:    pop {r4, r5, r7, pc}
135entry:
136  %sum = add <2 x i64> %lhs, %rhs
137  ret <2 x i64> %sum
138}
139
140define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
141; CHECK-MVE-LABEL: vector_add_f16:
142; CHECK-MVE:       @ %bb.0: @ %entry
143; CHECK-MVE-NEXT:    .save {r4, r5, r7, lr}
144; CHECK-MVE-NEXT:    push {r4, r5, r7, lr}
145; CHECK-MVE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
146; CHECK-MVE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
147; CHECK-MVE-NEXT:    vmov d8, r0, r1
148; CHECK-MVE-NEXT:    add r0, sp, #64
149; CHECK-MVE-NEXT:    vldrw.u32 q6, [r0]
150; CHECK-MVE-NEXT:    vmov d9, r2, r3
151; CHECK-MVE-NEXT:    vmov.u16 r4, q4[0]
152; CHECK-MVE-NEXT:    vmov.u16 r0, q6[0]
153; CHECK-MVE-NEXT:    bl __aeabi_h2f
154; CHECK-MVE-NEXT:    mov r5, r0
155; CHECK-MVE-NEXT:    mov r0, r4
156; CHECK-MVE-NEXT:    bl __aeabi_h2f
157; CHECK-MVE-NEXT:    mov r1, r5
158; CHECK-MVE-NEXT:    bl __aeabi_fadd
159; CHECK-MVE-NEXT:    bl __aeabi_f2h
160; CHECK-MVE-NEXT:    vmov.16 q5[0], r0
161; CHECK-MVE-NEXT:    vmov.u16 r0, q6[1]
162; CHECK-MVE-NEXT:    vmov.u16 r4, q4[1]
163; CHECK-MVE-NEXT:    bl __aeabi_h2f
164; CHECK-MVE-NEXT:    mov r5, r0
165; CHECK-MVE-NEXT:    mov r0, r4
166; CHECK-MVE-NEXT:    bl __aeabi_h2f
167; CHECK-MVE-NEXT:    mov r1, r5
168; CHECK-MVE-NEXT:    bl __aeabi_fadd
169; CHECK-MVE-NEXT:    bl __aeabi_f2h
170; CHECK-MVE-NEXT:    vmov.16 q5[1], r0
171; CHECK-MVE-NEXT:    vmov.u16 r0, q6[2]
172; CHECK-MVE-NEXT:    vmov.u16 r4, q4[2]
173; CHECK-MVE-NEXT:    bl __aeabi_h2f
174; CHECK-MVE-NEXT:    mov r5, r0
175; CHECK-MVE-NEXT:    mov r0, r4
176; CHECK-MVE-NEXT:    bl __aeabi_h2f
177; CHECK-MVE-NEXT:    mov r1, r5
178; CHECK-MVE-NEXT:    bl __aeabi_fadd
179; CHECK-MVE-NEXT:    bl __aeabi_f2h
180; CHECK-MVE-NEXT:    vmov.16 q5[2], r0
181; CHECK-MVE-NEXT:    vmov.u16 r0, q6[3]
182; CHECK-MVE-NEXT:    vmov.u16 r4, q4[3]
183; CHECK-MVE-NEXT:    bl __aeabi_h2f
184; CHECK-MVE-NEXT:    mov r5, r0
185; CHECK-MVE-NEXT:    mov r0, r4
186; CHECK-MVE-NEXT:    bl __aeabi_h2f
187; CHECK-MVE-NEXT:    mov r1, r5
188; CHECK-MVE-NEXT:    bl __aeabi_fadd
189; CHECK-MVE-NEXT:    bl __aeabi_f2h
190; CHECK-MVE-NEXT:    vmov.16 q5[3], r0
191; CHECK-MVE-NEXT:    vmov.u16 r0, q6[4]
192; CHECK-MVE-NEXT:    vmov.u16 r4, q4[4]
193; CHECK-MVE-NEXT:    bl __aeabi_h2f
194; CHECK-MVE-NEXT:    mov r5, r0
195; CHECK-MVE-NEXT:    mov r0, r4
196; CHECK-MVE-NEXT:    bl __aeabi_h2f
197; CHECK-MVE-NEXT:    mov r1, r5
198; CHECK-MVE-NEXT:    bl __aeabi_fadd
199; CHECK-MVE-NEXT:    bl __aeabi_f2h
200; CHECK-MVE-NEXT:    vmov.16 q5[4], r0
201; CHECK-MVE-NEXT:    vmov.u16 r0, q6[5]
202; CHECK-MVE-NEXT:    vmov.u16 r4, q4[5]
203; CHECK-MVE-NEXT:    bl __aeabi_h2f
204; CHECK-MVE-NEXT:    mov r5, r0
205; CHECK-MVE-NEXT:    mov r0, r4
206; CHECK-MVE-NEXT:    bl __aeabi_h2f
207; CHECK-MVE-NEXT:    mov r1, r5
208; CHECK-MVE-NEXT:    bl __aeabi_fadd
209; CHECK-MVE-NEXT:    bl __aeabi_f2h
210; CHECK-MVE-NEXT:    vmov.16 q5[5], r0
211; CHECK-MVE-NEXT:    vmov.u16 r0, q6[6]
212; CHECK-MVE-NEXT:    vmov.u16 r4, q4[6]
213; CHECK-MVE-NEXT:    bl __aeabi_h2f
214; CHECK-MVE-NEXT:    mov r5, r0
215; CHECK-MVE-NEXT:    mov r0, r4
216; CHECK-MVE-NEXT:    bl __aeabi_h2f
217; CHECK-MVE-NEXT:    mov r1, r5
218; CHECK-MVE-NEXT:    bl __aeabi_fadd
219; CHECK-MVE-NEXT:    bl __aeabi_f2h
220; CHECK-MVE-NEXT:    vmov.16 q5[6], r0
221; CHECK-MVE-NEXT:    vmov.u16 r0, q6[7]
222; CHECK-MVE-NEXT:    vmov.u16 r4, q4[7]
223; CHECK-MVE-NEXT:    bl __aeabi_h2f
224; CHECK-MVE-NEXT:    mov r5, r0
225; CHECK-MVE-NEXT:    mov r0, r4
226; CHECK-MVE-NEXT:    bl __aeabi_h2f
227; CHECK-MVE-NEXT:    mov r1, r5
228; CHECK-MVE-NEXT:    bl __aeabi_fadd
229; CHECK-MVE-NEXT:    bl __aeabi_f2h
230; CHECK-MVE-NEXT:    vmov.16 q5[7], r0
231; CHECK-MVE-NEXT:    vmov r0, r1, d10
232; CHECK-MVE-NEXT:    vmov r2, r3, d11
233; CHECK-MVE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
234; CHECK-MVE-NEXT:    pop {r4, r5, r7, pc}
235;
236; CHECK-BE-LABEL: vector_add_f16:
237; CHECK-BE:       @ %bb.0: @ %entry
238; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
239; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
240; CHECK-BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
241; CHECK-BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
242; CHECK-BE-NEXT:    vmov d0, r1, r0
243; CHECK-BE-NEXT:    add r0, sp, #64
244; CHECK-BE-NEXT:    vldrh.u16 q6, [r0]
245; CHECK-BE-NEXT:    vmov d1, r3, r2
246; CHECK-BE-NEXT:    vrev64.16 q4, q0
247; CHECK-BE-NEXT:    vmov.u16 r0, q6[0]
248; CHECK-BE-NEXT:    vmov.u16 r4, q4[0]
249; CHECK-BE-NEXT:    bl __aeabi_h2f
250; CHECK-BE-NEXT:    mov r5, r0
251; CHECK-BE-NEXT:    mov r0, r4
252; CHECK-BE-NEXT:    bl __aeabi_h2f
253; CHECK-BE-NEXT:    mov r1, r5
254; CHECK-BE-NEXT:    bl __aeabi_fadd
255; CHECK-BE-NEXT:    bl __aeabi_f2h
256; CHECK-BE-NEXT:    vmov.16 q5[0], r0
257; CHECK-BE-NEXT:    vmov.u16 r0, q6[1]
258; CHECK-BE-NEXT:    vmov.u16 r4, q4[1]
259; CHECK-BE-NEXT:    bl __aeabi_h2f
260; CHECK-BE-NEXT:    mov r5, r0
261; CHECK-BE-NEXT:    mov r0, r4
262; CHECK-BE-NEXT:    bl __aeabi_h2f
263; CHECK-BE-NEXT:    mov r1, r5
264; CHECK-BE-NEXT:    bl __aeabi_fadd
265; CHECK-BE-NEXT:    bl __aeabi_f2h
266; CHECK-BE-NEXT:    vmov.16 q5[1], r0
267; CHECK-BE-NEXT:    vmov.u16 r0, q6[2]
268; CHECK-BE-NEXT:    vmov.u16 r4, q4[2]
269; CHECK-BE-NEXT:    bl __aeabi_h2f
270; CHECK-BE-NEXT:    mov r5, r0
271; CHECK-BE-NEXT:    mov r0, r4
272; CHECK-BE-NEXT:    bl __aeabi_h2f
273; CHECK-BE-NEXT:    mov r1, r5
274; CHECK-BE-NEXT:    bl __aeabi_fadd
275; CHECK-BE-NEXT:    bl __aeabi_f2h
276; CHECK-BE-NEXT:    vmov.16 q5[2], r0
277; CHECK-BE-NEXT:    vmov.u16 r0, q6[3]
278; CHECK-BE-NEXT:    vmov.u16 r4, q4[3]
279; CHECK-BE-NEXT:    bl __aeabi_h2f
280; CHECK-BE-NEXT:    mov r5, r0
281; CHECK-BE-NEXT:    mov r0, r4
282; CHECK-BE-NEXT:    bl __aeabi_h2f
283; CHECK-BE-NEXT:    mov r1, r5
284; CHECK-BE-NEXT:    bl __aeabi_fadd
285; CHECK-BE-NEXT:    bl __aeabi_f2h
286; CHECK-BE-NEXT:    vmov.16 q5[3], r0
287; CHECK-BE-NEXT:    vmov.u16 r0, q6[4]
288; CHECK-BE-NEXT:    vmov.u16 r4, q4[4]
289; CHECK-BE-NEXT:    bl __aeabi_h2f
290; CHECK-BE-NEXT:    mov r5, r0
291; CHECK-BE-NEXT:    mov r0, r4
292; CHECK-BE-NEXT:    bl __aeabi_h2f
293; CHECK-BE-NEXT:    mov r1, r5
294; CHECK-BE-NEXT:    bl __aeabi_fadd
295; CHECK-BE-NEXT:    bl __aeabi_f2h
296; CHECK-BE-NEXT:    vmov.16 q5[4], r0
297; CHECK-BE-NEXT:    vmov.u16 r0, q6[5]
298; CHECK-BE-NEXT:    vmov.u16 r4, q4[5]
299; CHECK-BE-NEXT:    bl __aeabi_h2f
300; CHECK-BE-NEXT:    mov r5, r0
301; CHECK-BE-NEXT:    mov r0, r4
302; CHECK-BE-NEXT:    bl __aeabi_h2f
303; CHECK-BE-NEXT:    mov r1, r5
304; CHECK-BE-NEXT:    bl __aeabi_fadd
305; CHECK-BE-NEXT:    bl __aeabi_f2h
306; CHECK-BE-NEXT:    vmov.16 q5[5], r0
307; CHECK-BE-NEXT:    vmov.u16 r0, q6[6]
308; CHECK-BE-NEXT:    vmov.u16 r4, q4[6]
309; CHECK-BE-NEXT:    bl __aeabi_h2f
310; CHECK-BE-NEXT:    mov r5, r0
311; CHECK-BE-NEXT:    mov r0, r4
312; CHECK-BE-NEXT:    bl __aeabi_h2f
313; CHECK-BE-NEXT:    mov r1, r5
314; CHECK-BE-NEXT:    bl __aeabi_fadd
315; CHECK-BE-NEXT:    bl __aeabi_f2h
316; CHECK-BE-NEXT:    vmov.16 q5[6], r0
317; CHECK-BE-NEXT:    vmov.u16 r0, q6[7]
318; CHECK-BE-NEXT:    vmov.u16 r4, q4[7]
319; CHECK-BE-NEXT:    bl __aeabi_h2f
320; CHECK-BE-NEXT:    mov r5, r0
321; CHECK-BE-NEXT:    mov r0, r4
322; CHECK-BE-NEXT:    bl __aeabi_h2f
323; CHECK-BE-NEXT:    mov r1, r5
324; CHECK-BE-NEXT:    bl __aeabi_fadd
325; CHECK-BE-NEXT:    bl __aeabi_f2h
326; CHECK-BE-NEXT:    vmov.16 q5[7], r0
327; CHECK-BE-NEXT:    vrev64.16 q0, q5
328; CHECK-BE-NEXT:    vmov r1, r0, d0
329; CHECK-BE-NEXT:    vmov r3, r2, d1
330; CHECK-BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
331; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
332;
333; CHECK-FP-LABEL: vector_add_f16:
334; CHECK-FP:       @ %bb.0: @ %entry
335; CHECK-FP-NEXT:    vmov d0, r0, r1
336; CHECK-FP-NEXT:    mov r0, sp
337; CHECK-FP-NEXT:    vldrw.u32 q1, [r0]
338; CHECK-FP-NEXT:    vmov d1, r2, r3
339; CHECK-FP-NEXT:    vadd.f16 q0, q0, q1
340; CHECK-FP-NEXT:    vmov r0, r1, d0
341; CHECK-FP-NEXT:    vmov r2, r3, d1
342; CHECK-FP-NEXT:    bx lr
343entry:
344  %sum = fadd <8 x half> %lhs, %rhs
345  ret <8 x half> %sum
346}
347
348define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
349; CHECK-MVE-LABEL: vector_add_f32:
350; CHECK-MVE:       @ %bb.0: @ %entry
351; CHECK-MVE-NEXT:    .save {r4, r5, r6, r7, lr}
352; CHECK-MVE-NEXT:    push {r4, r5, r6, r7, lr}
353; CHECK-MVE-NEXT:    .pad #4
354; CHECK-MVE-NEXT:    sub sp, #4
355; CHECK-MVE-NEXT:    .vsave {d8, d9}
356; CHECK-MVE-NEXT:    vpush {d8, d9}
357; CHECK-MVE-NEXT:    mov r4, r0
358; CHECK-MVE-NEXT:    add r0, sp, #40
359; CHECK-MVE-NEXT:    vldrw.u32 q4, [r0]
360; CHECK-MVE-NEXT:    mov r6, r1
361; CHECK-MVE-NEXT:    mov r0, r3
362; CHECK-MVE-NEXT:    mov r5, r2
363; CHECK-MVE-NEXT:    vmov r7, r1, d9
364; CHECK-MVE-NEXT:    bl __aeabi_fadd
365; CHECK-MVE-NEXT:    vmov s19, r0
366; CHECK-MVE-NEXT:    mov r0, r5
367; CHECK-MVE-NEXT:    mov r1, r7
368; CHECK-MVE-NEXT:    bl __aeabi_fadd
369; CHECK-MVE-NEXT:    vmov r5, r1, d8
370; CHECK-MVE-NEXT:    vmov s18, r0
371; CHECK-MVE-NEXT:    mov r0, r6
372; CHECK-MVE-NEXT:    bl __aeabi_fadd
373; CHECK-MVE-NEXT:    vmov s17, r0
374; CHECK-MVE-NEXT:    mov r0, r4
375; CHECK-MVE-NEXT:    mov r1, r5
376; CHECK-MVE-NEXT:    bl __aeabi_fadd
377; CHECK-MVE-NEXT:    vmov s16, r0
378; CHECK-MVE-NEXT:    vmov r2, r3, d9
379; CHECK-MVE-NEXT:    vmov r0, r1, d8
380; CHECK-MVE-NEXT:    vpop {d8, d9}
381; CHECK-MVE-NEXT:    add sp, #4
382; CHECK-MVE-NEXT:    pop {r4, r5, r6, r7, pc}
383;
384; CHECK-BE-LABEL: vector_add_f32:
385; CHECK-BE:       @ %bb.0: @ %entry
386; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
387; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
388; CHECK-BE-NEXT:    .vsave {d8, d9, d10, d11}
389; CHECK-BE-NEXT:    vpush {d8, d9, d10, d11}
390; CHECK-BE-NEXT:    vmov d0, r1, r0
391; CHECK-BE-NEXT:    add r1, sp, #48
392; CHECK-BE-NEXT:    vldrw.u32 q5, [r1]
393; CHECK-BE-NEXT:    vmov d1, r3, r2
394; CHECK-BE-NEXT:    vrev64.32 q4, q0
395; CHECK-BE-NEXT:    vmov r4, r0, d9
396; CHECK-BE-NEXT:    vmov r5, r1, d11
397; CHECK-BE-NEXT:    bl __aeabi_fadd
398; CHECK-BE-NEXT:    vmov s19, r0
399; CHECK-BE-NEXT:    mov r0, r4
400; CHECK-BE-NEXT:    mov r1, r5
401; CHECK-BE-NEXT:    bl __aeabi_fadd
402; CHECK-BE-NEXT:    vmov s18, r0
403; CHECK-BE-NEXT:    vmov r4, r0, d8
404; CHECK-BE-NEXT:    vmov r5, r1, d10
405; CHECK-BE-NEXT:    bl __aeabi_fadd
406; CHECK-BE-NEXT:    vmov s17, r0
407; CHECK-BE-NEXT:    mov r0, r4
408; CHECK-BE-NEXT:    mov r1, r5
409; CHECK-BE-NEXT:    bl __aeabi_fadd
410; CHECK-BE-NEXT:    vmov s16, r0
411; CHECK-BE-NEXT:    vrev64.32 q0, q4
412; CHECK-BE-NEXT:    vmov r1, r0, d0
413; CHECK-BE-NEXT:    vmov r3, r2, d1
414; CHECK-BE-NEXT:    vpop {d8, d9, d10, d11}
415; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
416;
417; CHECK-FP-LABEL: vector_add_f32:
418; CHECK-FP:       @ %bb.0: @ %entry
419; CHECK-FP-NEXT:    vmov d0, r0, r1
420; CHECK-FP-NEXT:    mov r0, sp
421; CHECK-FP-NEXT:    vldrw.u32 q1, [r0]
422; CHECK-FP-NEXT:    vmov d1, r2, r3
423; CHECK-FP-NEXT:    vadd.f32 q0, q0, q1
424; CHECK-FP-NEXT:    vmov r0, r1, d0
425; CHECK-FP-NEXT:    vmov r2, r3, d1
426; CHECK-FP-NEXT:    bx lr
427entry:
428  %sum = fadd <4 x float> %lhs, %rhs
429  ret <4 x float> %sum
430}
431
432define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
433; CHECK-MVE-LABEL: vector_add_f64:
434; CHECK-MVE:       @ %bb.0: @ %entry
435; CHECK-MVE-NEXT:    .save {r4, r5, r6, r7, lr}
436; CHECK-MVE-NEXT:    push {r4, r5, r6, r7, lr}
437; CHECK-MVE-NEXT:    .pad #4
438; CHECK-MVE-NEXT:    sub sp, #4
439; CHECK-MVE-NEXT:    .vsave {d8, d9}
440; CHECK-MVE-NEXT:    vpush {d8, d9}
441; CHECK-MVE-NEXT:    mov r5, r0
442; CHECK-MVE-NEXT:    add r0, sp, #40
443; CHECK-MVE-NEXT:    vldrw.u32 q4, [r0]
444; CHECK-MVE-NEXT:    mov r4, r2
445; CHECK-MVE-NEXT:    mov r6, r3
446; CHECK-MVE-NEXT:    mov r7, r1
447; CHECK-MVE-NEXT:    vmov r2, r3, d9
448; CHECK-MVE-NEXT:    mov r0, r4
449; CHECK-MVE-NEXT:    mov r1, r6
450; CHECK-MVE-NEXT:    bl __aeabi_dadd
451; CHECK-MVE-NEXT:    vmov r2, r3, d8
452; CHECK-MVE-NEXT:    mov r4, r0
453; CHECK-MVE-NEXT:    mov r6, r1
454; CHECK-MVE-NEXT:    mov r0, r5
455; CHECK-MVE-NEXT:    mov r1, r7
456; CHECK-MVE-NEXT:    bl __aeabi_dadd
457; CHECK-MVE-NEXT:    mov r2, r4
458; CHECK-MVE-NEXT:    mov r3, r6
459; CHECK-MVE-NEXT:    vpop {d8, d9}
460; CHECK-MVE-NEXT:    add sp, #4
461; CHECK-MVE-NEXT:    pop {r4, r5, r6, r7, pc}
462;
463; CHECK-BE-LABEL: vector_add_f64:
464; CHECK-BE:       @ %bb.0: @ %entry
465; CHECK-BE-NEXT:    .save {r4, r5, r6, r7, lr}
466; CHECK-BE-NEXT:    push {r4, r5, r6, r7, lr}
467; CHECK-BE-NEXT:    .pad #4
468; CHECK-BE-NEXT:    sub sp, #4
469; CHECK-BE-NEXT:    .vsave {d8, d9}
470; CHECK-BE-NEXT:    vpush {d8, d9}
471; CHECK-BE-NEXT:    mov r5, r0
472; CHECK-BE-NEXT:    add r0, sp, #40
473; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
474; CHECK-BE-NEXT:    mov r6, r2
475; CHECK-BE-NEXT:    mov r4, r3
476; CHECK-BE-NEXT:    mov r7, r1
477; CHECK-BE-NEXT:    vrev64.8 q4, q0
478; CHECK-BE-NEXT:    mov r0, r6
479; CHECK-BE-NEXT:    vmov r3, r2, d9
480; CHECK-BE-NEXT:    mov r1, r4
481; CHECK-BE-NEXT:    bl __aeabi_dadd
482; CHECK-BE-NEXT:    vmov r3, r2, d8
483; CHECK-BE-NEXT:    mov r4, r0
484; CHECK-BE-NEXT:    mov r6, r1
485; CHECK-BE-NEXT:    mov r0, r5
486; CHECK-BE-NEXT:    mov r1, r7
487; CHECK-BE-NEXT:    bl __aeabi_dadd
488; CHECK-BE-NEXT:    mov r2, r4
489; CHECK-BE-NEXT:    mov r3, r6
490; CHECK-BE-NEXT:    vpop {d8, d9}
491; CHECK-BE-NEXT:    add sp, #4
492; CHECK-BE-NEXT:    pop {r4, r5, r6, r7, pc}
493;
494; CHECK-FP-LABEL: vector_add_f64:
495; CHECK-FP:       @ %bb.0: @ %entry
496; CHECK-FP-NEXT:    .save {r4, r5, r6, r7, lr}
497; CHECK-FP-NEXT:    push {r4, r5, r6, r7, lr}
498; CHECK-FP-NEXT:    .pad #4
499; CHECK-FP-NEXT:    sub sp, #4
500; CHECK-FP-NEXT:    .vsave {d8, d9}
501; CHECK-FP-NEXT:    vpush {d8, d9}
502; CHECK-FP-NEXT:    mov r5, r2
503; CHECK-FP-NEXT:    add r2, sp, #40
504; CHECK-FP-NEXT:    vldrw.u32 q4, [r2]
505; CHECK-FP-NEXT:    mov r4, r3
506; CHECK-FP-NEXT:    vmov r2, r3, d8
507; CHECK-FP-NEXT:    bl __aeabi_dadd
508; CHECK-FP-NEXT:    vmov r2, r3, d9
509; CHECK-FP-NEXT:    mov r6, r0
510; CHECK-FP-NEXT:    mov r7, r1
511; CHECK-FP-NEXT:    mov r0, r5
512; CHECK-FP-NEXT:    mov r1, r4
513; CHECK-FP-NEXT:    bl __aeabi_dadd
514; CHECK-FP-NEXT:    mov r2, r0
515; CHECK-FP-NEXT:    mov r3, r1
516; CHECK-FP-NEXT:    mov r0, r6
517; CHECK-FP-NEXT:    mov r1, r7
518; CHECK-FP-NEXT:    vpop {d8, d9}
519; CHECK-FP-NEXT:    add sp, #4
520; CHECK-FP-NEXT:    pop {r4, r5, r6, r7, pc}
521entry:
522  %sum = fadd <2 x double> %lhs, %rhs
523  ret <2 x double> %sum
524}
525
526define <4 x i32> @insertextract(i32 %x, i32 %y) {
527; CHECK-LE-LABEL: insertextract:
528; CHECK-LE:       @ %bb.0:
529; CHECK-LE-NEXT:    mov r3, r1
530; CHECK-LE-NEXT:    mov r1, r0
531; CHECK-LE-NEXT:    mov r2, r0
532; CHECK-LE-NEXT:    bx lr
533;
534; CHECK-BE-LABEL: insertextract:
535; CHECK-BE:       @ %bb.0:
536; CHECK-BE-NEXT:    mov r3, r1
537; CHECK-BE-NEXT:    mov r1, r0
538; CHECK-BE-NEXT:    mov r2, r0
539; CHECK-BE-NEXT:    bx lr
540  %1 = insertelement <4 x i32> undef, i32 %x, i32 0
541  %2 = insertelement <4 x i32> %1, i32 %x, i32 1
542  %3 = insertelement <4 x i32> %2, i32 %x, i32 2
543  %4 = insertelement <4 x i32> %3, i32 %y, i32 3
544  ret <4 x i32> %4
545}
546
547declare void @print_uint32x4_t(<4 x i32> %val)
548define i32 @main(i64 %x, i64 %y) {
549; CHECK-LE-LABEL: main:
550; CHECK-LE:       @ %bb.0: @ %entry
551; CHECK-LE-NEXT:    .save {r4, lr}
552; CHECK-LE-NEXT:    push {r4, lr}
553; CHECK-LE-NEXT:    .vsave {d8, d9}
554; CHECK-LE-NEXT:    vpush {d8, d9}
555; CHECK-LE-NEXT:    .pad #8
556; CHECK-LE-NEXT:    sub sp, #8
557; CHECK-LE-NEXT:    vmov.32 q4[2], r2
558; CHECK-LE-NEXT:    mov r4, r1
559; CHECK-LE-NEXT:    mov r1, r0
560; CHECK-LE-NEXT:    vmov.32 q4[3], r3
561; CHECK-LE-NEXT:    movs r0, #0
562; CHECK-LE-NEXT:    mov r2, r1
563; CHECK-LE-NEXT:    mov r3, r4
564; CHECK-LE-NEXT:    vstr d9, [sp]
565; CHECK-LE-NEXT:    bl print_uint32x4_t
566; CHECK-LE-NEXT:    movs r0, #0
567; CHECK-LE-NEXT:    movs r2, #1
568; CHECK-LE-NEXT:    mov r3, r4
569; CHECK-LE-NEXT:    vstr d9, [sp]
570; CHECK-LE-NEXT:    bl print_uint32x4_t
571; CHECK-LE-NEXT:    movs r0, #0
572; CHECK-LE-NEXT:    add sp, #8
573; CHECK-LE-NEXT:    vpop {d8, d9}
574; CHECK-LE-NEXT:    pop {r4, pc}
575;
576; CHECK-BE-LABEL: main:
577; CHECK-BE:       @ %bb.0: @ %entry
578; CHECK-BE-NEXT:    .save {r4, lr}
579; CHECK-BE-NEXT:    push {r4, lr}
580; CHECK-BE-NEXT:    .vsave {d8, d9}
581; CHECK-BE-NEXT:    vpush {d8, d9}
582; CHECK-BE-NEXT:    .pad #8
583; CHECK-BE-NEXT:    sub sp, #8
584; CHECK-BE-NEXT:    vmov.32 q0[2], r2
585; CHECK-BE-NEXT:    mov r4, r1
586; CHECK-BE-NEXT:    mov r1, r0
587; CHECK-BE-NEXT:    vmov.32 q0[3], r3
588; CHECK-BE-NEXT:    vrev64.32 q4, q0
589; CHECK-BE-NEXT:    movs r0, #0
590; CHECK-BE-NEXT:    mov r2, r1
591; CHECK-BE-NEXT:    mov r3, r4
592; CHECK-BE-NEXT:    vstr d9, [sp]
593; CHECK-BE-NEXT:    bl print_uint32x4_t
594; CHECK-BE-NEXT:    movs r0, #0
595; CHECK-BE-NEXT:    movs r2, #1
596; CHECK-BE-NEXT:    mov r3, r4
597; CHECK-BE-NEXT:    vstr d9, [sp]
598; CHECK-BE-NEXT:    bl print_uint32x4_t
599; CHECK-BE-NEXT:    movs r0, #0
600; CHECK-BE-NEXT:    add sp, #8
601; CHECK-BE-NEXT:    vpop {d8, d9}
602; CHECK-BE-NEXT:    pop {r4, pc}
603entry:
604  %a = insertelement <2 x i64> poison, i64 %x, i64 0
605  %b = insertelement <2 x i64> %a, i64 %y, i64 1
606  %c = bitcast <2 x i64> %b to <4 x i32>
607  %i = insertelement <4 x i32> %c, i32 1, i64 0
608  tail call void @print_uint32x4_t(i32 0, <4 x i32> %c)
609  tail call void @print_uint32x4_t(i32 0, <4 x i32> %i)
610  ret i32 0
611}
612