xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve %s -o - | FileCheck %s
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve -early-live-intervals -verify-machineinstrs %s -o - | FileCheck %s
4
5declare void @external_function()
6
7define arm_aapcs_vfpcc void @spill_multivector(ptr %p) {
8; CHECK-LABEL: spill_multivector:
9; CHECK:       @ %bb.0: @ %entry
10; CHECK-NEXT:    .save {r4, r5, r7, lr}
11; CHECK-NEXT:    push {r4, r5, r7, lr}
12; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
13; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
14; CHECK-NEXT:    .pad #96
15; CHECK-NEXT:    sub sp, #96
16; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
17; CHECK-NEXT:    mov r5, r0
18; CHECK-NEXT:    add.w lr, sp, #64
19; CHECK-NEXT:    mov r4, r0
20; CHECK-NEXT:    vld21.32 {q0, q1}, [r5]!
21; CHECK-NEXT:    adds r0, #64
22; CHECK-NEXT:    vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill
23; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
24; CHECK-NEXT:    add.w lr, sp, #32
25; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]
26; CHECK-NEXT:    add.w r0, r4, #128
27; CHECK-NEXT:    vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill
28; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
29; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]
30; CHECK-NEXT:    add.w r0, r4, #192
31; CHECK-NEXT:    vld20.32 {q6, q7}, [r0]
32; CHECK-NEXT:    vstmia sp, {d0, d1, d2, d3} @ 32-byte Spill
33; CHECK-NEXT:    vld21.32 {q6, q7}, [r0]
34; CHECK-NEXT:    add.w r0, r4, #256
35; CHECK-NEXT:    vld20.32 {q4, q5}, [r0]
36; CHECK-NEXT:    vld21.32 {q4, q5}, [r0]
37; CHECK-NEXT:    bl external_function
38; CHECK-NEXT:    vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload
39; CHECK-NEXT:    add.w lr, sp, #32
40; CHECK-NEXT:    vstrw.32 q2, [r4, #80]
41; CHECK-NEXT:    vstrw.32 q5, [r4, #144]
42; CHECK-NEXT:    vstrw.32 q4, [r4, #128]
43; CHECK-NEXT:    vstrw.32 q7, [r4, #112]
44; CHECK-NEXT:    vstrw.32 q1, [r4, #64]
45; CHECK-NEXT:    vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
46; CHECK-NEXT:    add.w lr, sp, #64
47; CHECK-NEXT:    vstrw.32 q2, [r4, #48]
48; CHECK-NEXT:    vstrw.32 q6, [r4, #96]
49; CHECK-NEXT:    vstrw.32 q1, [r5]
50; CHECK-NEXT:    vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
51; CHECK-NEXT:    vstrw.32 q2, [r4, #16]
52; CHECK-NEXT:    vstrw.32 q1, [r4]
53; CHECK-NEXT:    add sp, #96
54; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
55; CHECK-NEXT:    pop {r4, r5, r7, pc}
56entry:
57  %v01 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %p)
58  %ip23 = getelementptr i32, ptr %p, i32 16
59  %v23 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip23)
60  %ip45 = getelementptr i32, ptr %ip23, i32 16
61  %v45 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip45)
62  %ip67 = getelementptr i32, ptr %ip45, i32 16
63  %v67 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip67)
64  %ip89 = getelementptr i32, ptr %ip67, i32 16
65  %v89 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip89)
66  call void @external_function()
67
68  %v0 = extractvalue { <4 x i32>, <4 x i32> } %v01, 0
69  %v1 = extractvalue { <4 x i32>, <4 x i32> } %v01, 1
70  store <4 x i32> %v0, ptr %p, align 4
71  %p1 = getelementptr <4 x i32>, ptr %p, i32 1
72  store <4 x i32> %v1, ptr %p1, align 4
73
74  %v2 = extractvalue { <4 x i32>, <4 x i32> } %v23, 0
75  %v3 = extractvalue { <4 x i32>, <4 x i32> } %v23, 1
76  %p2 = getelementptr <4 x i32>, ptr %p, i32 2
77  store <4 x i32> %v2, ptr %p2, align 4
78  %p3 = getelementptr <4 x i32>, ptr %p, i32 3
79  store <4 x i32> %v3, ptr %p3, align 4
80
81  %v4 = extractvalue { <4 x i32>, <4 x i32> } %v45, 0
82  %v5 = extractvalue { <4 x i32>, <4 x i32> } %v45, 1
83  %p4 = getelementptr <4 x i32>, ptr %p, i32 4
84  store <4 x i32> %v4, ptr %p4, align 4
85  %p5 = getelementptr <4 x i32>, ptr %p, i32 5
86  store <4 x i32> %v5, ptr %p5, align 4
87
88  %v6 = extractvalue { <4 x i32>, <4 x i32> } %v67, 0
89  %v7 = extractvalue { <4 x i32>, <4 x i32> } %v67, 1
90  %p6 = getelementptr <4 x i32>, ptr %p, i32 6
91  store <4 x i32> %v6, ptr %p6, align 4
92  %p7 = getelementptr <4 x i32>, ptr %p, i32 7
93  store <4 x i32> %v7, ptr %p7, align 4
94
95  %v8 = extractvalue { <4 x i32>, <4 x i32> } %v89, 0
96  %v9 = extractvalue { <4 x i32>, <4 x i32> } %v89, 1
97  %p8 = getelementptr <4 x i32>, ptr %p, i32 8
98  store <4 x i32> %v8, ptr %p8, align 4
99  %p9 = getelementptr <4 x i32>, ptr %p, i32 9
100  store <4 x i32> %v9, ptr %p9, align 4
101  ret void
102}
103
104declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr)
105