xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
4; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
5; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
6
7;
8; ST1B
9;
10
11define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, ptr %addr) {
12; CHECK-LABEL: st1b_i8:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
15; CHECK-NEXT:    ret
16  call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data,
17                                          <vscale x 16 x i1> %pred,
18                                          ptr %addr)
19  ret void
20}
21
22define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %addr) {
23; CHECK-LABEL: st1b_h:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    st1b { z0.h }, p0, [x0]
26; CHECK-NEXT:    ret
27  %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
28  call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc,
29                                         <vscale x 8 x i1> %pred,
30                                         ptr %addr)
31  ret void
32}
33
34define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) {
35; CHECK-LABEL: st1b_s:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    st1b { z0.s }, p0, [x0]
38; CHECK-NEXT:    ret
39  %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
40  call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc,
41                                         <vscale x 4 x i1> %pred,
42                                         ptr %addr)
43  ret void
44}
45
46define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) {
47; CHECK-LABEL: st1b_d:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    st1b { z0.d }, p0, [x0]
50; CHECK-NEXT:    ret
51  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
52  call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc,
53                                         <vscale x 2 x i1> %pred,
54                                         ptr %addr)
55  ret void
56}
57
58;
59; ST1H
60;
61
62define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %addr) {
63; CHECK-LABEL: st1h_i16:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
66; CHECK-NEXT:    ret
67  call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data,
68                                          <vscale x 8 x i1> %pred,
69                                          ptr %addr)
70  ret void
71}
72
73define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, ptr %addr) {
74; CHECK-LABEL: st1h_f16:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
77; CHECK-NEXT:    ret
78  call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data,
79                                          <vscale x 8 x i1> %pred,
80                                          ptr %addr)
81  ret void
82}
83
84define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, ptr %addr) #0 {
85; CHECK-LABEL: st1h_bf16:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
88; CHECK-NEXT:    ret
89  call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data,
90                                           <vscale x 8 x i1> %pred,
91                                           ptr %addr)
92  ret void
93}
94
95define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) {
96; CHECK-LABEL: st1h_s:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
99; CHECK-NEXT:    ret
100  %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
101  call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc,
102                                         <vscale x 4 x i1> %pred,
103                                         ptr %addr)
104  ret void
105}
106
107define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) {
108; CHECK-LABEL: st1h_d:
109; CHECK:       // %bb.0:
110; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
111; CHECK-NEXT:    ret
112  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
113  call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc,
114                                         <vscale x 2 x i1> %pred,
115                                         ptr %addr)
116  ret void
117}
118
119;
120; ST1W
121;
122
123define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) {
124; CHECK-LABEL: st1w_i32:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
127; CHECK-NEXT:    ret
128  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data,
129                                          <vscale x 4 x i1> %pred,
130                                          ptr %addr)
131  ret void
132}
133
134define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, ptr %addr) {
135; CHECK-LABEL: st1w_f32:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
138; CHECK-NEXT:    ret
139  call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data,
140                                          <vscale x 4 x i1> %pred,
141                                          ptr %addr)
142  ret void
143}
144
145define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) {
146; CHECK-LABEL: st1w_d:
147; CHECK:       // %bb.0:
148; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
149; CHECK-NEXT:    ret
150  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
151  call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc,
152                                         <vscale x 2 x i1> %pred,
153                                         ptr %addr)
154  ret void
155}
156
157;
158; ST1D
159;
160
161define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) {
162; CHECK-LABEL: st1d_i64:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
165; CHECK-NEXT:    ret
166  call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data,
167                                          <vscale x 2 x i1> %pred,
168                                          ptr %addr)
169  ret void
170}
171
172define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, ptr %addr) {
173; CHECK-LABEL: st1d_f64:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
176; CHECK-NEXT:    ret
177  call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data,
178                                          <vscale x 2 x i1> %pred,
179                                          ptr %addr)
180  ret void
181}
182
183declare void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
184
185declare void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, ptr)
186declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
187declare void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr)
188declare void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
189
190declare void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, ptr)
191declare void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, ptr)
192declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
193declare void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr)
194
195declare void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, ptr)
196declare void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, ptr)
197declare void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, ptr)
198declare void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
199declare void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr)
200
201; +bf16 is required for the bfloat version.
202attributes #0 = { "target-features"="+bf16" }
203