xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-loadstore.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
4
5define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(ptr %vp) {
6; CHECK-LE-LABEL: load_4xi32_a4:
7; CHECK-LE:       @ %bb.0: @ %entry
8; CHECK-LE-NEXT:    vldrw.u32 q0, [r0]
9; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
10; CHECK-LE-NEXT:    bx lr
11;
12; CHECK-BE-LABEL: load_4xi32_a4:
13; CHECK-BE:       @ %bb.0: @ %entry
14; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
15; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1
16; CHECK-BE-NEXT:    vrev64.32 q0, q1
17; CHECK-BE-NEXT:    bx lr
18entry:
19  %0 = load <4 x i32>, ptr %vp, align 4
20  %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
21  ret <4 x i32> %1
22}
23
24define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(ptr %vp) {
25; CHECK-LE-LABEL: load_4xi32_a2:
26; CHECK-LE:       @ %bb.0: @ %entry
27; CHECK-LE-NEXT:    vldrh.u16 q0, [r0]
28; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
29; CHECK-LE-NEXT:    bx lr
30;
31; CHECK-BE-LABEL: load_4xi32_a2:
32; CHECK-BE:       @ %bb.0: @ %entry
33; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
34; CHECK-BE-NEXT:    vrev32.8 q0, q0
35; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1
36; CHECK-BE-NEXT:    vrev64.32 q0, q1
37; CHECK-BE-NEXT:    bx lr
38entry:
39  %0 = load <4 x i32>, ptr %vp, align 2
40  %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
41  ret <4 x i32> %1
42}
43
44define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(ptr %vp) {
45; CHECK-LE-LABEL: load_4xi32_a1:
46; CHECK-LE:       @ %bb.0: @ %entry
47; CHECK-LE-NEXT:    vldrb.u8 q0, [r0]
48; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
49; CHECK-LE-NEXT:    bx lr
50;
51; CHECK-BE-LABEL: load_4xi32_a1:
52; CHECK-BE:       @ %bb.0: @ %entry
53; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
54; CHECK-BE-NEXT:    vrev32.8 q0, q0
55; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1
56; CHECK-BE-NEXT:    vrev64.32 q0, q1
57; CHECK-BE-NEXT:    bx lr
58entry:
59  %0 = load <4 x i32>, ptr %vp, align 1
60  %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
61  ret <4 x i32> %1
62}
63
64define arm_aapcs_vfpcc void @store_4xi32_a4(ptr %vp, <4 x i32> %val) {
65; CHECK-LE-LABEL: store_4xi32_a4:
66; CHECK-LE:       @ %bb.0: @ %entry
67; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
68; CHECK-LE-NEXT:    vstrw.32 q0, [r0]
69; CHECK-LE-NEXT:    bx lr
70;
71; CHECK-BE-LABEL: store_4xi32_a4:
72; CHECK-BE:       @ %bb.0: @ %entry
73; CHECK-BE-NEXT:    vrev64.32 q1, q0
74; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1
75; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
76; CHECK-BE-NEXT:    bx lr
77entry:
78  %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
79  store <4 x i32> %0, ptr %vp, align 4
80  ret void
81}
82
83define arm_aapcs_vfpcc void @store_4xi32_a2(ptr %vp, <4 x i32> %val) {
84; CHECK-LE-LABEL: store_4xi32_a2:
85; CHECK-LE:       @ %bb.0: @ %entry
86; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
87; CHECK-LE-NEXT:    vstrh.16 q0, [r0]
88; CHECK-LE-NEXT:    bx lr
89;
90; CHECK-BE-LABEL: store_4xi32_a2:
91; CHECK-BE:       @ %bb.0: @ %entry
92; CHECK-BE-NEXT:    vrev64.32 q1, q0
93; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1
94; CHECK-BE-NEXT:    vrev32.8 q0, q0
95; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
96; CHECK-BE-NEXT:    bx lr
97entry:
98  %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
99  store <4 x i32> %0, ptr %vp, align 2
100  ret void
101}
102
103define arm_aapcs_vfpcc void @store_4xi32_a1(ptr %vp, <4 x i32> %val) {
104; CHECK-LE-LABEL: store_4xi32_a1:
105; CHECK-LE:       @ %bb.0: @ %entry
106; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1
107; CHECK-LE-NEXT:    vstrb.8 q0, [r0]
108; CHECK-LE-NEXT:    bx lr
109;
110; CHECK-BE-LABEL: store_4xi32_a1:
111; CHECK-BE:       @ %bb.0: @ %entry
112; CHECK-BE-NEXT:    vrev64.32 q1, q0
113; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1
114; CHECK-BE-NEXT:    vrev32.8 q0, q0
115; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
116; CHECK-BE-NEXT:    bx lr
117entry:
118  %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
119  store <4 x i32> %0, ptr %vp, align 1
120  ret void
121}
122
123define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(ptr %ip) {
124; CHECK-LE-LABEL: load_4xi32_a4_offset_pos:
125; CHECK-LE:       @ %bb.0: @ %entry
126; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #508]
127; CHECK-LE-NEXT:    bx lr
128;
129; CHECK-BE-LABEL: load_4xi32_a4_offset_pos:
130; CHECK-BE:       @ %bb.0: @ %entry
131; CHECK-BE-NEXT:    add.w r0, r0, #508
132; CHECK-BE-NEXT:    vldrb.u8 q1, [r0]
133; CHECK-BE-NEXT:    vrev64.8 q0, q1
134; CHECK-BE-NEXT:    bx lr
135entry:
136  %ipoffset = getelementptr inbounds i32, ptr %ip, i32 127
137  %0 = load <4 x i32>, ptr %ipoffset, align 4
138  ret <4 x i32> %0
139}
140
141define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(ptr %ip) {
142; CHECK-LE-LABEL: load_4xi32_a4_offset_neg:
143; CHECK-LE:       @ %bb.0: @ %entry
144; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #-508]
145; CHECK-LE-NEXT:    bx lr
146;
147; CHECK-BE-LABEL: load_4xi32_a4_offset_neg:
148; CHECK-BE:       @ %bb.0: @ %entry
149; CHECK-BE-NEXT:    sub.w r0, r0, #508
150; CHECK-BE-NEXT:    vldrb.u8 q1, [r0]
151; CHECK-BE-NEXT:    vrev64.8 q0, q1
152; CHECK-BE-NEXT:    bx lr
153entry:
154  %ipoffset = getelementptr inbounds i32, ptr %ip, i32 -127
155  %0 = load <4 x i32>, ptr %ipoffset, align 4
156  ret <4 x i32> %0
157}
158
159define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() {
160; CHECK-LE-LABEL: loadstore_4xi32_stack_off16:
161; CHECK-LE:       @ %bb.0: @ %entry
162; CHECK-LE-NEXT:    .pad #40
163; CHECK-LE-NEXT:    sub sp, #40
164; CHECK-LE-NEXT:    vmov.i32 q0, #0x1
165; CHECK-LE-NEXT:    mov r0, sp
166; CHECK-LE-NEXT:    vstrw.32 q0, [r0]
167; CHECK-LE-NEXT:    movs r0, #3
168; CHECK-LE-NEXT:    vstrw.32 q0, [sp, #16]
169; CHECK-LE-NEXT:    str r0, [sp, #16]
170; CHECK-LE-NEXT:    vldrw.u32 q0, [sp, #16]
171; CHECK-LE-NEXT:    add sp, #40
172; CHECK-LE-NEXT:    bx lr
173;
174; CHECK-BE-LABEL: loadstore_4xi32_stack_off16:
175; CHECK-BE:       @ %bb.0: @ %entry
176; CHECK-BE-NEXT:    .pad #40
177; CHECK-BE-NEXT:    sub sp, #40
178; CHECK-BE-NEXT:    vmov.i32 q0, #0x1
179; CHECK-BE-NEXT:    mov r0, sp
180; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
181; CHECK-BE-NEXT:    movs r0, #3
182; CHECK-BE-NEXT:    vstrw.32 q0, [sp, #16]
183; CHECK-BE-NEXT:    str r0, [sp, #16]
184; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16]
185; CHECK-BE-NEXT:    vrev64.8 q0, q1
186; CHECK-BE-NEXT:    add sp, #40
187; CHECK-BE-NEXT:    bx lr
188entry:
189  %c = alloca [1 x [5 x [2 x i32]]], align 4
190  store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %c, align 4
191  %arrayidx5.2 = getelementptr inbounds [1 x [5 x [2 x i32]]], ptr %c, i32 0, i32 0, i32 2, i32 0
192  store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %arrayidx5.2, align 4
193  store i32 3, ptr %arrayidx5.2, align 4
194  %0 = load <4 x i32>, ptr %arrayidx5.2, align 4
195  ret <4 x i32> %0
196}
197
198define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() {
199; CHECK-LE-LABEL: loadstore_8xi16_stack_off16:
200; CHECK-LE:       @ %bb.0: @ %entry
201; CHECK-LE-NEXT:    .pad #40
202; CHECK-LE-NEXT:    sub sp, #40
203; CHECK-LE-NEXT:    vmov.i16 q0, #0x1
204; CHECK-LE-NEXT:    mov r0, sp
205; CHECK-LE-NEXT:    vstrh.16 q0, [r0]
206; CHECK-LE-NEXT:    movs r0, #3
207; CHECK-LE-NEXT:    vstrh.16 q0, [sp, #16]
208; CHECK-LE-NEXT:    strh.w r0, [sp, #16]
209; CHECK-LE-NEXT:    vldrh.u16 q0, [sp, #16]
210; CHECK-LE-NEXT:    add sp, #40
211; CHECK-LE-NEXT:    bx lr
212;
213; CHECK-BE-LABEL: loadstore_8xi16_stack_off16:
214; CHECK-BE:       @ %bb.0: @ %entry
215; CHECK-BE-NEXT:    .pad #40
216; CHECK-BE-NEXT:    sub sp, #40
217; CHECK-BE-NEXT:    vmov.i16 q0, #0x1
218; CHECK-BE-NEXT:    mov r0, sp
219; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
220; CHECK-BE-NEXT:    movs r0, #3
221; CHECK-BE-NEXT:    vstrh.16 q0, [sp, #16]
222; CHECK-BE-NEXT:    strh.w r0, [sp, #16]
223; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16]
224; CHECK-BE-NEXT:    vrev64.8 q0, q1
225; CHECK-BE-NEXT:    add sp, #40
226; CHECK-BE-NEXT:    bx lr
227entry:
228  %c = alloca [1 x [10 x [2 x i16]]], align 2
229  store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %c, align 2
230  %arrayidx5.2 = getelementptr inbounds [1 x [10 x [2 x i16]]], ptr %c, i32 0, i32 0, i32 4, i32 0
231  store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %arrayidx5.2, align 2
232  store i16 3, ptr %arrayidx5.2, align 2
233  %0 = load <8 x i16>, ptr %arrayidx5.2, align 2
234  ret <8 x i16> %0
235}
236
237define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() {
238; CHECK-LE-LABEL: loadstore_16xi8_stack_off16:
239; CHECK-LE:       @ %bb.0: @ %entry
240; CHECK-LE-NEXT:    .pad #40
241; CHECK-LE-NEXT:    sub sp, #40
242; CHECK-LE-NEXT:    vmov.i8 q0, #0x1
243; CHECK-LE-NEXT:    mov r0, sp
244; CHECK-LE-NEXT:    vstrb.8 q0, [r0]
245; CHECK-LE-NEXT:    movs r0, #3
246; CHECK-LE-NEXT:    vstrb.8 q0, [sp, #16]
247; CHECK-LE-NEXT:    strb.w r0, [sp, #16]
248; CHECK-LE-NEXT:    vldrb.u8 q0, [sp, #16]
249; CHECK-LE-NEXT:    add sp, #40
250; CHECK-LE-NEXT:    bx lr
251;
252; CHECK-BE-LABEL: loadstore_16xi8_stack_off16:
253; CHECK-BE:       @ %bb.0: @ %entry
254; CHECK-BE-NEXT:    .pad #40
255; CHECK-BE-NEXT:    sub sp, #40
256; CHECK-BE-NEXT:    vmov.i8 q0, #0x1
257; CHECK-BE-NEXT:    mov r0, sp
258; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
259; CHECK-BE-NEXT:    movs r0, #3
260; CHECK-BE-NEXT:    vstrb.8 q0, [sp, #16]
261; CHECK-BE-NEXT:    strb.w r0, [sp, #16]
262; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16]
263; CHECK-BE-NEXT:    vrev64.8 q0, q1
264; CHECK-BE-NEXT:    add sp, #40
265; CHECK-BE-NEXT:    bx lr
266entry:
267  %c = alloca [1 x [20 x [2 x i8]]], align 1
268  store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %c, align 1
269  %arrayidx5.2 = getelementptr inbounds [1 x [20 x [2 x i8]]], ptr %c, i32 0, i32 0, i32 8, i32 0
270  store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %arrayidx5.2, align 1
271  store i8 3, ptr %arrayidx5.2, align 1
272  %0 = load <16 x i8>, ptr %arrayidx5.2, align 1
273  ret <16 x i8> %0
274}
275