xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll (revision c95253b1bac865b6d90cce186b7d665de163d50c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=1024  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
3; RUN: llc -aarch64-sve-vector-bits-min=2048  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
4
5target triple = "aarch64-unknown-linux-gnu"
6
7define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
8; CHECK-LABEL: load_zext_v4i16i32:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    ldr d0, [x0]
11; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
12; CHECK-NEXT:    ret
13  %a = load <4 x i16>, ptr %ap
14  %val = zext <4 x i16> %a to <4 x i32>
15  ret <4 x i32> %val
16}
17
18; Don't try to use SVE for irregular types.
19define <2 x i256> @load_zext_v2i64i256(ptr %ap) #0 {
20; CHECK-LABEL: load_zext_v2i64i256:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    ldr q0, [x0]
23; CHECK-NEXT:    mov x1, xzr
24; CHECK-NEXT:    mov x2, xzr
25; CHECK-NEXT:    mov x3, xzr
26; CHECK-NEXT:    mov x5, xzr
27; CHECK-NEXT:    mov x6, xzr
28; CHECK-NEXT:    mov x4, v0.d[1]
29; CHECK-NEXT:    fmov x0, d0
30; CHECK-NEXT:    mov x7, xzr
31; CHECK-NEXT:    ret
32  %a = load <2 x i64>, ptr %ap
33  %val = zext <2 x i64> %a to <2 x i256>
34  ret <2 x i256> %val
35}
36
37define void @load_zext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 {
38; CHECK-LABEL: load_zext_v8i16i32:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    ptrue p0.s, vl8
41; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
42; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
43; CHECK-NEXT:    ret
44  %a = load <8 x i16>, ptr %ap
45  %val = zext <8 x i16> %a to <8 x i32>
46  store <8 x i32> %val, ptr %b
47  ret void
48}
49
50define void @load_zext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 {
51; CHECK-LABEL: load_zext_v16i16i32:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    ptrue p0.s, vl16
54; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
55; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
56; CHECK-NEXT:    ret
57  %a = load <16 x i16>, ptr %ap
58  %val = zext <16 x i16> %a to <16 x i32>
59  store <16 x i32> %val, ptr %b
60  ret void
61}
62
63define void @load_zext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 {
64; CHECK-LABEL: load_zext_v32i16i32:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    ptrue p0.s, vl32
67; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
68; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
69; CHECK-NEXT:    ret
70  %a = load <32 x i16>, ptr %ap
71  %val = zext <32 x i16> %a to <32 x i32>
72  store <32 x i32> %val, ptr %b
73  ret void
74}
75
76define void @load_zext_v64i16i32(ptr %ap, ptr %b) #0 {
77; VBITS_GE_1024-LABEL: load_zext_v64i16i32:
78; VBITS_GE_1024:       // %bb.0:
79; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
80; VBITS_GE_1024-NEXT:    mov x8, #32 // =0x20
81; VBITS_GE_1024-NEXT:    ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
82; VBITS_GE_1024-NEXT:    ld1h { z1.s }, p0/z, [x0]
83; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
84; VBITS_GE_1024-NEXT:    st1w { z1.s }, p0, [x1]
85; VBITS_GE_1024-NEXT:    ret
86;
87; VBITS_GE_2048-LABEL: load_zext_v64i16i32:
88; VBITS_GE_2048:       // %bb.0:
89; VBITS_GE_2048-NEXT:    ptrue p0.s, vl64
90; VBITS_GE_2048-NEXT:    ld1h { z0.s }, p0/z, [x0]
91; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x1]
92; VBITS_GE_2048-NEXT:    ret
93  %a = load <64 x i16>, ptr %ap
94  %val = zext <64 x i16> %a to <64 x i32>
95  store <64 x i32> %val, ptr %b
96  ret void
97}
98
99define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
100; CHECK-LABEL: load_sext_v4i16i32:
101; CHECK:       // %bb.0:
102; CHECK-NEXT:    ldr d0, [x0]
103; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
104; CHECK-NEXT:    ret
105  %a = load <4 x i16>, ptr %ap
106  %val = sext <4 x i16> %a to <4 x i32>
107  ret <4 x i32> %val
108}
109
110define void @load_sext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 {
111; CHECK-LABEL: load_sext_v8i16i32:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    ptrue p0.s, vl8
114; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
115; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
116; CHECK-NEXT:    ret
117  %a = load <8 x i16>, ptr %ap
118  %val = sext <8 x i16> %a to <8 x i32>
119  store <8 x i32> %val, ptr %b
120  ret void
121}
122
123define void @load_sext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 {
124; CHECK-LABEL: load_sext_v16i16i32:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    ptrue p0.s, vl16
127; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
128; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
129; CHECK-NEXT:    ret
130  %a = load <16 x i16>, ptr %ap
131  %val = sext <16 x i16> %a to <16 x i32>
132  store <16 x i32> %val, ptr %b
133  ret void
134}
135
136define void @load_sext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 {
137; CHECK-LABEL: load_sext_v32i16i32:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    ptrue p0.s, vl32
140; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
141; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
142; CHECK-NEXT:    ret
143  %a = load <32 x i16>, ptr %ap
144  %val = sext <32 x i16> %a to <32 x i32>
145  store <32 x i32> %val, ptr %b
146  ret void
147}
148
149define void @load_sext_v64i16i32(ptr %ap, ptr %b) #0 {
150; VBITS_GE_1024-LABEL: load_sext_v64i16i32:
151; VBITS_GE_1024:       // %bb.0:
152; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
153; VBITS_GE_1024-NEXT:    mov x8, #32 // =0x20
154; VBITS_GE_1024-NEXT:    ld1sh { z0.s }, p0/z, [x0, x8, lsl #1]
155; VBITS_GE_1024-NEXT:    ld1sh { z1.s }, p0/z, [x0]
156; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
157; VBITS_GE_1024-NEXT:    st1w { z1.s }, p0, [x1]
158; VBITS_GE_1024-NEXT:    ret
159;
160; VBITS_GE_2048-LABEL: load_sext_v64i16i32:
161; VBITS_GE_2048:       // %bb.0:
162; VBITS_GE_2048-NEXT:    ptrue p0.s, vl64
163; VBITS_GE_2048-NEXT:    ld1sh { z0.s }, p0/z, [x0]
164; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x1]
165; VBITS_GE_2048-NEXT:    ret
166  %a = load <64 x i16>, ptr %ap
167  %val = sext <64 x i16> %a to <64 x i32>
168  store <64 x i32> %val, ptr %b
169  ret void
170}
171
172define void @load_zext_v32i8i64(ptr %ap, ptr %b) #0 {
173; VBITS_GE_1024-LABEL: load_zext_v32i8i64:
174; VBITS_GE_1024:       // %bb.0:
175; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
176; VBITS_GE_1024-NEXT:    mov w8, #16 // =0x10
177; VBITS_GE_1024-NEXT:    ld1b { z0.d }, p0/z, [x0, x8]
178; VBITS_GE_1024-NEXT:    ld1b { z1.d }, p0/z, [x0]
179; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
180; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
181; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
182; VBITS_GE_1024-NEXT:    ret
183;
184; VBITS_GE_2048-LABEL: load_zext_v32i8i64:
185; VBITS_GE_2048:       // %bb.0:
186; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
187; VBITS_GE_2048-NEXT:    ld1b { z0.d }, p0/z, [x0]
188; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
189; VBITS_GE_2048-NEXT:    ret
190  %a = load <32 x i8>, ptr %ap
191  %val = zext <32 x i8> %a to <32 x i64>
192  store <32 x i64> %val, ptr %b
193  ret void
194}
195
196define void @load_sext_v32i8i64(ptr %ap, ptr %b) #0 {
197; VBITS_GE_1024-LABEL: load_sext_v32i8i64:
198; VBITS_GE_1024:       // %bb.0:
199; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
200; VBITS_GE_1024-NEXT:    mov w8, #16 // =0x10
201; VBITS_GE_1024-NEXT:    ld1sb { z0.d }, p0/z, [x0, x8]
202; VBITS_GE_1024-NEXT:    ld1sb { z1.d }, p0/z, [x0]
203; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
204; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
205; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
206; VBITS_GE_1024-NEXT:    ret
207;
208; VBITS_GE_2048-LABEL: load_sext_v32i8i64:
209; VBITS_GE_2048:       // %bb.0:
210; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
211; VBITS_GE_2048-NEXT:    ld1sb { z0.d }, p0/z, [x0]
212; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
213; VBITS_GE_2048-NEXT:    ret
214  %a = load <32 x i8>, ptr %ap
215  %val = sext <32 x i8> %a to <32 x i64>
216  store <32 x i64> %val, ptr %b
217  ret void
218}
219
220define void @load_zext_v32i16i64(ptr %ap, ptr %b) #0 {
221; VBITS_GE_1024-LABEL: load_zext_v32i16i64:
222; VBITS_GE_1024:       // %bb.0:
223; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
224; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
225; VBITS_GE_1024-NEXT:    ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
226; VBITS_GE_1024-NEXT:    ld1h { z1.d }, p0/z, [x0]
227; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
228; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
229; VBITS_GE_1024-NEXT:    ret
230;
231; VBITS_GE_2048-LABEL: load_zext_v32i16i64:
232; VBITS_GE_2048:       // %bb.0:
233; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
234; VBITS_GE_2048-NEXT:    ld1h { z0.d }, p0/z, [x0]
235; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
236; VBITS_GE_2048-NEXT:    ret
237  %a = load <32 x i16>, ptr %ap
238  %val = zext <32 x i16> %a to <32 x i64>
239  store <32 x i64> %val, ptr %b
240  ret void
241}
242
243define void @load_sext_v32i16i64(ptr %ap, ptr %b) #0 {
244; VBITS_GE_1024-LABEL: load_sext_v32i16i64:
245; VBITS_GE_1024:       // %bb.0:
246; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
247; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
248; VBITS_GE_1024-NEXT:    ld1sh { z0.d }, p0/z, [x0, x8, lsl #1]
249; VBITS_GE_1024-NEXT:    ld1sh { z1.d }, p0/z, [x0]
250; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
251; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
252; VBITS_GE_1024-NEXT:    ret
253;
254; VBITS_GE_2048-LABEL: load_sext_v32i16i64:
255; VBITS_GE_2048:       // %bb.0:
256; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
257; VBITS_GE_2048-NEXT:    ld1sh { z0.d }, p0/z, [x0]
258; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
259; VBITS_GE_2048-NEXT:    ret
260  %a = load <32 x i16>, ptr %ap
261  %val = sext <32 x i16> %a to <32 x i64>
262  store <32 x i64> %val, ptr %b
263  ret void
264}
265
266define void @load_zext_v32i32i64(ptr %ap, ptr %b) #0 {
267; VBITS_GE_1024-LABEL: load_zext_v32i32i64:
268; VBITS_GE_1024:       // %bb.0:
269; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
270; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
271; VBITS_GE_1024-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
272; VBITS_GE_1024-NEXT:    ld1w { z1.d }, p0/z, [x0]
273; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
274; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
275; VBITS_GE_1024-NEXT:    ret
276;
277; VBITS_GE_2048-LABEL: load_zext_v32i32i64:
278; VBITS_GE_2048:       // %bb.0:
279; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
280; VBITS_GE_2048-NEXT:    ld1w { z0.d }, p0/z, [x0]
281; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
282; VBITS_GE_2048-NEXT:    ret
283  %a = load <32 x i32>, ptr %ap
284  %val = zext <32 x i32> %a to <32 x i64>
285  store <32 x i64> %val, ptr %b
286  ret void
287}
288
289define void @load_sext_v32i32i64(ptr %ap, ptr %b) #0 {
290; VBITS_GE_1024-LABEL: load_sext_v32i32i64:
291; VBITS_GE_1024:       // %bb.0:
292; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
293; VBITS_GE_1024-NEXT:    mov x8, #16 // =0x10
294; VBITS_GE_1024-NEXT:    ld1sw { z0.d }, p0/z, [x0, x8, lsl #2]
295; VBITS_GE_1024-NEXT:    ld1sw { z1.d }, p0/z, [x0]
296; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
297; VBITS_GE_1024-NEXT:    st1d { z1.d }, p0, [x1]
298; VBITS_GE_1024-NEXT:    ret
299;
300; VBITS_GE_2048-LABEL: load_sext_v32i32i64:
301; VBITS_GE_2048:       // %bb.0:
302; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
303; VBITS_GE_2048-NEXT:    ld1sw { z0.d }, p0/z, [x0]
304; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x1]
305; VBITS_GE_2048-NEXT:    ret
306  %a = load <32 x i32>, ptr %ap
307  %val = sext <32 x i32> %a to <32 x i64>
308  store <32 x i64> %val, ptr %b
309  ret void
310}
311
312attributes #0 = { "target-features"="+sve" }
313