xref: /llvm-project/llvm/test/CodeGen/AArch64/load.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; ===== Legal Scalars =====
6
7define i8 @load_i8(ptr %ptr) {
8; CHECK-LABEL: load_i8:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    ldrb w0, [x0]
11; CHECK-NEXT:    ret
12  %a = load i8, ptr %ptr
13  ret i8 %a
14}
15
16define i16 @load_i8_s16(ptr %ptr) {
17; CHECK-LABEL: load_i8_s16:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    ldrsb w0, [x0]
20; CHECK-NEXT:    ret
21  %a = load i8, ptr %ptr
22  %s = sext i8 %a to i16
23  ret i16 %s
24}
25
26define i16 @load_i8_u16(ptr %ptr) {
27; CHECK-LABEL: load_i8_u16:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    ldrb w0, [x0]
30; CHECK-NEXT:    ret
31  %a = load i8, ptr %ptr
32  %s = zext i8 %a to i16
33  ret i16 %s
34}
35
36define i32 @load_i8_s32(ptr %ptr) {
37; CHECK-LABEL: load_i8_s32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ldrsb w0, [x0]
40; CHECK-NEXT:    ret
41  %a = load i8, ptr %ptr
42  %s = sext i8 %a to i32
43  ret i32 %s
44}
45
46define i32 @load_i8_u32(ptr %ptr) {
47; CHECK-LABEL: load_i8_u32:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    ldrb w0, [x0]
50; CHECK-NEXT:    ret
51  %a = load i8, ptr %ptr
52  %s = zext i8 %a to i32
53  ret i32 %s
54}
55
56define i64 @load_i8_s64(ptr %ptr) {
57; CHECK-LABEL: load_i8_s64:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    ldrsb x0, [x0]
60; CHECK-NEXT:    ret
61  %a = load i8, ptr %ptr
62  %s = sext i8 %a to i64
63  ret i64 %s
64}
65
66define i64 @load_i8_u64(ptr %ptr) {
67; CHECK-LABEL: load_i8_u64:
68; CHECK:       // %bb.0:
69; CHECK-NEXT:    ldrb w0, [x0]
70; CHECK-NEXT:    ret
71  %a = load i8, ptr %ptr
72  %s = zext i8 %a to i64
73  ret i64 %s
74}
75
76define i16 @load_i16(ptr %ptr) {
77; CHECK-LABEL: load_i16:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ldrh w0, [x0]
80; CHECK-NEXT:    ret
81  %a = load i16, ptr %ptr
82  ret i16 %a
83}
84
85define i32 @load_i16_s32(ptr %ptr) {
86; CHECK-LABEL: load_i16_s32:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    ldrsh w0, [x0]
89; CHECK-NEXT:    ret
90  %a = load i16, ptr %ptr
91  %s = sext i16 %a to i32
92  ret i32 %s
93}
94
95define i32 @load_i16_u32(ptr %ptr) {
96; CHECK-LABEL: load_i16_u32:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    ldrh w0, [x0]
99; CHECK-NEXT:    ret
100  %a = load i16, ptr %ptr
101  %s = zext i16 %a to i32
102  ret i32 %s
103}
104
105define i64 @load_i16_s64(ptr %ptr) {
106; CHECK-LABEL: load_i16_s64:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    ldrsh x0, [x0]
109; CHECK-NEXT:    ret
110  %a = load i16, ptr %ptr
111  %s = sext i16 %a to i64
112  ret i64 %s
113}
114
115define i64 @load_i16_u64(ptr %ptr) {
116; CHECK-LABEL: load_i16_u64:
117; CHECK:       // %bb.0:
118; CHECK-NEXT:    ldrh w0, [x0]
119; CHECK-NEXT:    ret
120  %a = load i16, ptr %ptr
121  %s = zext i16 %a to i64
122  ret i64 %s
123}
124
125define i32 @load_i32(ptr %ptr) {
126; CHECK-LABEL: load_i32:
127; CHECK:       // %bb.0:
128; CHECK-NEXT:    ldr w0, [x0]
129; CHECK-NEXT:    ret
130  %a = load i32, ptr %ptr
131  ret i32 %a
132}
133
134define i64 @load_i32_s64(ptr %ptr) {
135; CHECK-LABEL: load_i32_s64:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    ldrsw x0, [x0]
138; CHECK-NEXT:    ret
139  %a = load i32, ptr %ptr
140  %s = sext i32 %a to i64
141  ret i64 %s
142}
143
144define i64 @load_i32_u64(ptr %ptr) {
145; CHECK-LABEL: load_i32_u64:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ldr w0, [x0]
148; CHECK-NEXT:    ret
149  %a = load i32, ptr %ptr
150  %s = zext i32 %a to i64
151  ret i64 %s
152}
153
154define i64 @load_i64(ptr %ptr) {
155; CHECK-LABEL: load_i64:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    ldr x0, [x0]
158; CHECK-NEXT:    ret
159  %a = load i64, ptr %ptr
160  ret i64 %a
161}
162
163; ===== Legal Vector Types =====
164
165define <8 x i8> @load_v8i8(ptr %ptr) {
166; CHECK-LABEL: load_v8i8:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    ldr d0, [x0]
169; CHECK-NEXT:    ret
170  %a = load <8 x i8>, ptr %ptr
171  ret <8 x i8> %a
172}
173
174define <16 x i8> @load_v16i8(ptr %ptr) {
175; CHECK-LABEL: load_v16i8:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    ldr q0, [x0]
178; CHECK-NEXT:    ret
179  %a = load <16 x i8>, ptr %ptr
180  ret <16 x i8> %a
181}
182
183define <4 x i16> @load_v4i16(ptr %ptr) {
184; CHECK-LABEL: load_v4i16:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    ldr d0, [x0]
187; CHECK-NEXT:    ret
188  %a = load <4 x i16>, ptr %ptr
189  ret <4 x i16> %a
190}
191
192define <8 x i16> @load_v8i16(ptr %ptr) {
193; CHECK-LABEL: load_v8i16:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    ldr q0, [x0]
196; CHECK-NEXT:    ret
197  %a = load <8 x i16>, ptr %ptr
198  ret <8 x i16> %a
199}
200
201define <2 x i32> @load_v2i32(ptr %ptr) {
202; CHECK-LABEL: load_v2i32:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    ldr d0, [x0]
205; CHECK-NEXT:    ret
206  %a = load <2 x i32>, ptr %ptr
207  ret <2 x i32> %a
208}
209
210define <4 x i32> @load_v4i32(ptr %ptr) {
211; CHECK-LABEL: load_v4i32:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    ldr q0, [x0]
214; CHECK-NEXT:    ret
215  %a = load <4 x i32>, ptr %ptr
216  ret <4 x i32> %a
217}
218
219define <2 x i64> @load_v2i64(ptr %ptr) {
220; CHECK-LABEL: load_v2i64:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    ldr q0, [x0]
223; CHECK-NEXT:    ret
224  %a = load <2 x i64>, ptr %ptr
225  ret <2 x i64> %a
226}
227
228; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
229
230define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) {
231; CHECK-SD-LABEL: load_v2i8:
232; CHECK-SD:       // %bb.0:
233; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
234; CHECK-SD-NEXT:    add x8, x0, #1
235; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
236; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
237; CHECK-SD-NEXT:    ret
238;
239; CHECK-GI-LABEL: load_v2i8:
240; CHECK-GI:       // %bb.0:
241; CHECK-GI-NEXT:    ld1 { v0.b }[0], [x0]
242; CHECK-GI-NEXT:    ldr b1, [x0, #1]
243; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
244; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
245; CHECK-GI-NEXT:    ret
246  %a = load <2 x i8>, ptr %ptr
247  ret <2 x i8> %a
248}
249
250define i32 @load_v4i8(ptr %ptr, <4 x i8> %b) {
251; CHECK-LABEL: load_v4i8:
252; CHECK:       // %bb.0:
253; CHECK-NEXT:    ldr w0, [x0]
254; CHECK-NEXT:    ret
255  %a = load <4 x i8>, ptr %ptr
256  %c = bitcast <4 x i8> %a to i32
257  ret i32 %c
258}
259
260define <32 x i8> @load_v32i8(ptr %ptr) {
261; CHECK-LABEL: load_v32i8:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    ldp q0, q1, [x0]
264; CHECK-NEXT:    ret
265  %a = load <32 x i8>, ptr %ptr
266  ret <32 x i8> %a
267}
268
269define <2 x i16> @load_v2i16(ptr %ptr) {
270; CHECK-SD-LABEL: load_v2i16:
271; CHECK-SD:       // %bb.0:
272; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
273; CHECK-SD-NEXT:    add x8, x0, #2
274; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
275; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
276; CHECK-SD-NEXT:    ret
277;
278; CHECK-GI-LABEL: load_v2i16:
279; CHECK-GI:       // %bb.0:
280; CHECK-GI-NEXT:    ld1 { v0.h }[0], [x0]
281; CHECK-GI-NEXT:    ldr h1, [x0, #2]
282; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
283; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
284; CHECK-GI-NEXT:    ret
285  %a = load <2 x i16>, ptr %ptr
286  ret <2 x i16> %a
287}
288
289define <16 x i16> @load_v16i16(ptr %ptr) {
290; CHECK-LABEL: load_v16i16:
291; CHECK:       // %bb.0:
292; CHECK-NEXT:    ldp q0, q1, [x0]
293; CHECK-NEXT:    ret
294  %a = load <16 x i16>, ptr %ptr
295  ret <16 x i16> %a
296}
297
298define <1 x i32> @load_v1i32(ptr %ptr) {
299; CHECK-LABEL: load_v1i32:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    ldr s0, [x0]
302; CHECK-NEXT:    ret
303  %a = load <1 x i32>, ptr %ptr
304  ret <1 x i32> %a
305}
306
307define <8 x i32> @load_v8i32(ptr %ptr) {
308; CHECK-LABEL: load_v8i32:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    ldp q0, q1, [x0]
311; CHECK-NEXT:    ret
312  %a = load <8 x i32>, ptr %ptr
313  ret <8 x i32> %a
314}
315
316define <4 x i64> @load_v4i64(ptr %ptr) {
317; CHECK-LABEL: load_v4i64:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    ldp q0, q1, [x0]
320; CHECK-NEXT:    ret
321  %a = load <4 x i64>, ptr %ptr
322  ret <4 x i64> %a
323}
324
325; ===== Vectors with Non-Pow 2 Widths =====
326
327define <3 x i8> @load_v3i8(ptr %ptr) {
328; CHECK-SD-LABEL: load_v3i8:
329; CHECK-SD:       // %bb.0:
330; CHECK-SD-NEXT:    ldr s0, [x0]
331; CHECK-SD-NEXT:    umov w0, v0.b[0]
332; CHECK-SD-NEXT:    umov w1, v0.b[1]
333; CHECK-SD-NEXT:    umov w2, v0.b[2]
334; CHECK-SD-NEXT:    ret
335;
336; CHECK-GI-LABEL: load_v3i8:
337; CHECK-GI:       // %bb.0:
338; CHECK-GI-NEXT:    ldrb w8, [x0]
339; CHECK-GI-NEXT:    ldrb w1, [x0, #1]
340; CHECK-GI-NEXT:    ldrb w2, [x0, #2]
341; CHECK-GI-NEXT:    mov w0, w8
342; CHECK-GI-NEXT:    ret
343  %a = load <3 x i8>, ptr %ptr
344  ret <3 x i8> %a
345}
346
347define <7 x i8> @load_v7i8(ptr %ptr) {
348; CHECK-SD-LABEL: load_v7i8:
349; CHECK-SD:       // %bb.0:
350; CHECK-SD-NEXT:    ldr d0, [x0]
351; CHECK-SD-NEXT:    ret
352;
353; CHECK-GI-LABEL: load_v7i8:
354; CHECK-GI:       // %bb.0:
355; CHECK-GI-NEXT:    ldr b0, [x0]
356; CHECK-GI-NEXT:    ldr b1, [x0, #1]
357; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
358; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
359; CHECK-GI-NEXT:    ldr b1, [x0, #2]
360; CHECK-GI-NEXT:    mov v0.b[2], v1.b[0]
361; CHECK-GI-NEXT:    ldr b1, [x0, #3]
362; CHECK-GI-NEXT:    mov v0.b[3], v1.b[0]
363; CHECK-GI-NEXT:    ldr b1, [x0, #4]
364; CHECK-GI-NEXT:    mov v0.b[4], v1.b[0]
365; CHECK-GI-NEXT:    ldr b1, [x0, #5]
366; CHECK-GI-NEXT:    mov v0.b[5], v1.b[0]
367; CHECK-GI-NEXT:    ldr b1, [x0, #6]
368; CHECK-GI-NEXT:    mov v0.b[6], v1.b[0]
369; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
370; CHECK-GI-NEXT:    ret
371  %a = load <7 x i8>, ptr %ptr
372  ret <7 x i8> %a
373}
374
375define <3 x i16> @load_v3i16(ptr %ptr) {
376; CHECK-SD-LABEL: load_v3i16:
377; CHECK-SD:       // %bb.0:
378; CHECK-SD-NEXT:    ldr d0, [x0]
379; CHECK-SD-NEXT:    ret
380;
381; CHECK-GI-LABEL: load_v3i16:
382; CHECK-GI:       // %bb.0:
383; CHECK-GI-NEXT:    ldr h0, [x0]
384; CHECK-GI-NEXT:    add x8, x0, #2
385; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
386; CHECK-GI-NEXT:    add x8, x0, #4
387; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x8]
388; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
389; CHECK-GI-NEXT:    ret
390  %a = load <3 x i16>, ptr %ptr
391  ret <3 x i16> %a
392}
393
394define <7 x i16> @load_v7i16(ptr %ptr) {
395; CHECK-SD-LABEL: load_v7i16:
396; CHECK-SD:       // %bb.0:
397; CHECK-SD-NEXT:    ldr q0, [x0]
398; CHECK-SD-NEXT:    ret
399;
400; CHECK-GI-LABEL: load_v7i16:
401; CHECK-GI:       // %bb.0:
402; CHECK-GI-NEXT:    ldr h0, [x0]
403; CHECK-GI-NEXT:    add x8, x0, #2
404; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
405; CHECK-GI-NEXT:    add x8, x0, #4
406; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x8]
407; CHECK-GI-NEXT:    add x8, x0, #6
408; CHECK-GI-NEXT:    ld1 { v0.h }[3], [x8]
409; CHECK-GI-NEXT:    add x8, x0, #8
410; CHECK-GI-NEXT:    ld1 { v0.h }[4], [x8]
411; CHECK-GI-NEXT:    add x8, x0, #10
412; CHECK-GI-NEXT:    ld1 { v0.h }[5], [x8]
413; CHECK-GI-NEXT:    add x8, x0, #12
414; CHECK-GI-NEXT:    ld1 { v0.h }[6], [x8]
415; CHECK-GI-NEXT:    ret
416  %a = load <7 x i16>, ptr %ptr
417  ret <7 x i16> %a
418}
419
420define <3 x i32> @load_v3i32(ptr %ptr) {
421; CHECK-SD-LABEL: load_v3i32:
422; CHECK-SD:       // %bb.0:
423; CHECK-SD-NEXT:    ldr q0, [x0]
424; CHECK-SD-NEXT:    ret
425;
426; CHECK-GI-LABEL: load_v3i32:
427; CHECK-GI:       // %bb.0:
428; CHECK-GI-NEXT:    ldr s0, [x0]
429; CHECK-GI-NEXT:    add x8, x0, #4
430; CHECK-GI-NEXT:    ld1 { v0.s }[1], [x8]
431; CHECK-GI-NEXT:    add x8, x0, #8
432; CHECK-GI-NEXT:    ld1 { v0.s }[2], [x8]
433; CHECK-GI-NEXT:    ret
434  %a = load <3 x i32>, ptr %ptr
435  ret <3 x i32> %a
436}
437
438define <2 x i128> @load_v2i128(ptr %p) {
439; CHECK-SD-LABEL: load_v2i128:
440; CHECK-SD:       // %bb.0:
441; CHECK-SD-NEXT:    ldp x8, x1, [x0]
442; CHECK-SD-NEXT:    ldp x2, x3, [x0, #16]
443; CHECK-SD-NEXT:    mov x0, x8
444; CHECK-SD-NEXT:    ret
445;
446; CHECK-GI-LABEL: load_v2i128:
447; CHECK-GI:       // %bb.0:
448; CHECK-GI-NEXT:    ldp q0, q1, [x0]
449; CHECK-GI-NEXT:    mov d2, v0.d[1]
450; CHECK-GI-NEXT:    mov d3, v1.d[1]
451; CHECK-GI-NEXT:    fmov x0, d0
452; CHECK-GI-NEXT:    fmov x2, d1
453; CHECK-GI-NEXT:    fmov x1, d2
454; CHECK-GI-NEXT:    fmov x3, d3
455; CHECK-GI-NEXT:    ret
456  %a = load <2 x i128>, ptr %p
457  ret <2 x i128> %a
458}
459
460define <2 x fp128> @load_v2f128(ptr %p) {
461; CHECK-LABEL: load_v2f128:
462; CHECK:       // %bb.0:
463; CHECK-NEXT:    ldp q0, q1, [x0]
464; CHECK-NEXT:    ret
465  %a = load <2 x fp128>, ptr %p
466  ret <2 x fp128> %a
467}
468
469define i32 @load_i8_s16_extrasuse(ptr %ptr, ptr %ptr2) {
470; CHECK-LABEL: load_i8_s16_extrasuse:
471; CHECK:       // %bb.0:
472; CHECK-NEXT:    ldr w8, [x0]
473; CHECK-NEXT:    sxtb w0, w8
474; CHECK-NEXT:    str w8, [x1]
475; CHECK-NEXT:    ret
476  %a = load i32, ptr %ptr
477  %s = shl i32 %a, 24
478  %b = ashr i32 %s, 24
479  store i32 %a, ptr %ptr2
480  ret i32 %b
481}
482