xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-loads-stores.ll (revision c95253b1bac865b6d90cce186b7d665de163d50c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | not grep ptrue
3; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
4; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
5; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
9; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
10; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
11; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
12; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
13; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
14; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
15; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
16; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
17; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
18
19target triple = "aarch64-unknown-linux-gnu"
20
21; Don't use SVE for 64-bit vectors.
22define void @load_v2f32(ptr %a, ptr %b) #0 {
23; CHECK-LABEL: load_v2f32:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    ldr d0, [x0]
26; CHECK-NEXT:    str d0, [x1]
27; CHECK-NEXT:    ret
28  %load = load <2 x float>, ptr %a
29  store <2 x float> %load, ptr %b
30  ret void
31}
32
33; Don't use SVE for 128-bit vectors.
34define void @load_v4f32(ptr %a, ptr %b) #0 {
35; CHECK-LABEL: load_v4f32:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    ldr q0, [x0]
38; CHECK-NEXT:    str q0, [x1]
39; CHECK-NEXT:    ret
40  %load = load <4 x float>, ptr %a
41  store <4 x float> %load, ptr %b
42  ret void
43}
44
45define void @load_v8f32(ptr %a, ptr %b) #0 {
46; CHECK-LABEL: load_v8f32:
47; CHECK:       // %bb.0:
48; CHECK-NEXT:    ptrue p0.s, vl8
49; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
50; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
51; CHECK-NEXT:    ret
52  %load = load <8 x float>, ptr %a
53  store <8 x float> %load, ptr %b
54  ret void
55}
56
57define void @load_v16f32(ptr %a, ptr %b) #0 {
58; VBITS_GE_256-LABEL: load_v16f32:
59; VBITS_GE_256:       // %bb.0:
60; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
61; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
62; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
63; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
64; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
65; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
66; VBITS_GE_256-NEXT:    ret
67;
68; VBITS_GE_512-LABEL: load_v16f32:
69; VBITS_GE_512:       // %bb.0:
70; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
71; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
72; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
73; VBITS_GE_512-NEXT:    ret
74;
75; VBITS_GE_1024-LABEL: load_v16f32:
76; VBITS_GE_1024:       // %bb.0:
77; VBITS_GE_1024-NEXT:    ptrue p0.s, vl16
78; VBITS_GE_1024-NEXT:    ld1w { z0.s }, p0/z, [x0]
79; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x1]
80; VBITS_GE_1024-NEXT:    ret
81;
82; VBITS_GE_2048-LABEL: load_v16f32:
83; VBITS_GE_2048:       // %bb.0:
84; VBITS_GE_2048-NEXT:    ptrue p0.s, vl16
85; VBITS_GE_2048-NEXT:    ld1w { z0.s }, p0/z, [x0]
86; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x1]
87; VBITS_GE_2048-NEXT:    ret
88  %load = load <16 x float>, ptr %a
89  store <16 x float> %load, ptr %b
90  ret void
91}
92
93define void @load_v32f32(ptr %a, ptr %b) #0 {
94; VBITS_GE_256-LABEL: load_v32f32:
95; VBITS_GE_256:       // %bb.0:
96; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
97; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
98; VBITS_GE_256-NEXT:    mov x9, #24 // =0x18
99; VBITS_GE_256-NEXT:    mov x10, #8 // =0x8
100; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
101; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
102; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
103; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x0]
104; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
105; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1, x9, lsl #2]
106; VBITS_GE_256-NEXT:    st1w { z2.s }, p0, [x1, x10, lsl #2]
107; VBITS_GE_256-NEXT:    st1w { z3.s }, p0, [x1]
108; VBITS_GE_256-NEXT:    ret
109;
110; VBITS_GE_512-LABEL: load_v32f32:
111; VBITS_GE_512:       // %bb.0:
112; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
113; VBITS_GE_512-NEXT:    mov x8, #16 // =0x10
114; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
115; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0]
116; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
117; VBITS_GE_512-NEXT:    st1w { z1.s }, p0, [x1]
118; VBITS_GE_512-NEXT:    ret
119;
120; VBITS_GE_1024-LABEL: load_v32f32:
121; VBITS_GE_1024:       // %bb.0:
122; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
123; VBITS_GE_1024-NEXT:    ld1w { z0.s }, p0/z, [x0]
124; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x1]
125; VBITS_GE_1024-NEXT:    ret
126;
127; VBITS_GE_2048-LABEL: load_v32f32:
128; VBITS_GE_2048:       // %bb.0:
129; VBITS_GE_2048-NEXT:    ptrue p0.s, vl32
130; VBITS_GE_2048-NEXT:    ld1w { z0.s }, p0/z, [x0]
131; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x1]
132; VBITS_GE_2048-NEXT:    ret
133  %load = load <32 x float>, ptr %a
134  store <32 x float> %load, ptr %b
135  ret void
136}
137
138define void @load_v64f32(ptr %a, ptr %b) #0 {
139; VBITS_GE_256-LABEL: load_v64f32:
140; VBITS_GE_256:       // %bb.0:
141; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
142; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
143; VBITS_GE_256-NEXT:    mov x9, #24 // =0x18
144; VBITS_GE_256-NEXT:    mov x10, #16 // =0x10
145; VBITS_GE_256-NEXT:    mov x11, #48 // =0x30
146; VBITS_GE_256-NEXT:    mov x12, #40 // =0x28
147; VBITS_GE_256-NEXT:    mov x13, #56 // =0x38
148; VBITS_GE_256-NEXT:    mov x14, #32 // =0x20
149; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x11, lsl #2]
150; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
151; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0, x13, lsl #2]
152; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x0, x9, lsl #2]
153; VBITS_GE_256-NEXT:    ld1w { z4.s }, p0/z, [x0, x10, lsl #2]
154; VBITS_GE_256-NEXT:    ld1w { z5.s }, p0/z, [x0, x14, lsl #2]
155; VBITS_GE_256-NEXT:    ld1w { z6.s }, p0/z, [x0, x12, lsl #2]
156; VBITS_GE_256-NEXT:    ld1w { z7.s }, p0/z, [x0]
157; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x11, lsl #2]
158; VBITS_GE_256-NEXT:    st1w { z2.s }, p0, [x1, x13, lsl #2]
159; VBITS_GE_256-NEXT:    st1w { z5.s }, p0, [x1, x14, lsl #2]
160; VBITS_GE_256-NEXT:    st1w { z6.s }, p0, [x1, x12, lsl #2]
161; VBITS_GE_256-NEXT:    st1w { z4.s }, p0, [x1, x10, lsl #2]
162; VBITS_GE_256-NEXT:    st1w { z3.s }, p0, [x1, x9, lsl #2]
163; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1, x8, lsl #2]
164; VBITS_GE_256-NEXT:    st1w { z7.s }, p0, [x1]
165; VBITS_GE_256-NEXT:    ret
166;
167; VBITS_GE_512-LABEL: load_v64f32:
168; VBITS_GE_512:       // %bb.0:
169; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
170; VBITS_GE_512-NEXT:    mov x8, #32 // =0x20
171; VBITS_GE_512-NEXT:    mov x9, #48 // =0x30
172; VBITS_GE_512-NEXT:    mov x10, #16 // =0x10
173; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
174; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
175; VBITS_GE_512-NEXT:    ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
176; VBITS_GE_512-NEXT:    ld1w { z3.s }, p0/z, [x0]
177; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
178; VBITS_GE_512-NEXT:    st1w { z1.s }, p0, [x1, x9, lsl #2]
179; VBITS_GE_512-NEXT:    st1w { z2.s }, p0, [x1, x10, lsl #2]
180; VBITS_GE_512-NEXT:    st1w { z3.s }, p0, [x1]
181; VBITS_GE_512-NEXT:    ret
182;
183; VBITS_GE_1024-LABEL: load_v64f32:
184; VBITS_GE_1024:       // %bb.0:
185; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
186; VBITS_GE_1024-NEXT:    mov x8, #32 // =0x20
187; VBITS_GE_1024-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
188; VBITS_GE_1024-NEXT:    ld1w { z1.s }, p0/z, [x0]
189; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
190; VBITS_GE_1024-NEXT:    st1w { z1.s }, p0, [x1]
191; VBITS_GE_1024-NEXT:    ret
192;
193; VBITS_GE_2048-LABEL: load_v64f32:
194; VBITS_GE_2048:       // %bb.0:
195; VBITS_GE_2048-NEXT:    ptrue p0.s, vl64
196; VBITS_GE_2048-NEXT:    ld1w { z0.s }, p0/z, [x0]
197; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x1]
198; VBITS_GE_2048-NEXT:    ret
199  %load = load <64 x float>, ptr %a
200  store <64 x float> %load, ptr %b
201  ret void
202}
203
204attributes #0 = { "target-features"="+sve" }
205