xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll (revision f6947e479e14e7904aa0b2539a95f5dfdc8f9295)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
3
4;
5; ST2Q
6;
7define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
8; CHECK-LABEL: st2q_ss_i8:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
11; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
12; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
13; CHECK-NEXT:    ret
14  %1 = getelementptr i128, ptr %addr, i64 %offset
15  call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>%v0, <vscale x 16 x i8> %v1 ,
16                                           <vscale x 16 x i1> %pred,
17                                           ptr %1)
18  ret void
19}
20
21define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
22; CHECK-LABEL: st2q_ss_i16:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
25; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
26; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
27; CHECK-NEXT:    ret
28  %1 = getelementptr i128, ptr %addr, i64 %offset
29  call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
30                                          <vscale x 8 x i16> %v1,
31                                          <vscale x 8 x i1> %pred,
32                                          ptr %1)
33  ret void
34}
35
36define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
37; CHECK-LABEL: st2q_ss_i32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
40; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
41; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
42; CHECK-NEXT:    ret
43  %1 = getelementptr i128, ptr %addr, i64 %offset
44  call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
45                                          <vscale x 4 x i32> %v1,
46                                          <vscale x 4 x i1> %pred,
47                                          ptr %1)
48  ret void
49}
50
51define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
52; CHECK-LABEL: st2q_ss_i64:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
55; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
56; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
57; CHECK-NEXT:    ret
58  %1 = getelementptr i128, ptr %addr, i64 %offset
59  call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
60                                          <vscale x 2 x i64> %v1,
61                                          <vscale x 2 x i1> %pred,
62                                          ptr %1)
63  ret void
64}
65
66define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
67; CHECK-LABEL: st2q_ss_f16:
68; CHECK:       // %bb.0:
69; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
70; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
71; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
72; CHECK-NEXT:    ret
73  %1 = getelementptr i128, ptr %addr, i64 %offset
74  call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
75                                          <vscale x 8 x half> %v1,
76                                          <vscale x 8 x i1> %pred,
77                                          ptr %1)
78  ret void
79}
80
81define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
82; CHECK-LABEL: st2q_ss_f32:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
85; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
86; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
87; CHECK-NEXT:    ret
88  %1 = getelementptr i128, ptr %addr, i64 %offset
89  call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
90                                          <vscale x 4 x float> %v1,
91                                          <vscale x 4 x i1> %pred,
92                                          ptr %1)
93  ret void
94}
95
96define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
97; CHECK-LABEL: st2q_ss_f64:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
100; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
101; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
102; CHECK-NEXT:    ret
103  %1 = getelementptr i128, ptr %addr, i64 %offset
104  call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
105                                          <vscale x 2 x double> %v1,
106                                          <vscale x 2 x i1> %pred,
107                                          ptr %1)
108  ret void
109}
110
111define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
112; CHECK-LABEL: st2q_ss_bf16:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
115; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
116; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
117; CHECK-NEXT:    ret
118  %1 = getelementptr i128, ptr %addr, i64 %offset
119  call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
120                                          <vscale x 8 x bfloat> %v1,
121                                          <vscale x 8 x i1> %pred,
122                                          ptr %1)
123  ret void
124}
125
126
127define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
128; CHECK-LABEL: st2q_si_i8_off16:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
131; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
132; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
133; CHECK-NEXT:    ret
134  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
135  call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
136                                           <vscale x 16 x i8> %v1,
137                                           <vscale x 16 x i1> %pred,
138                                           ptr %base)
139  ret void
140}
141
142define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
143; CHECK-LABEL: st2q_si_i8_off14:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
146; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
147; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
148; CHECK-NEXT:    ret
149  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
150  call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
151                                           <vscale x 16 x i8> %v1,
152                                           <vscale x 16 x i1> %pred,
153                                           ptr %base)
154  ret void
155}
156
157define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
158; CHECK-LABEL: st2q_si_i16:
159; CHECK:       // %bb.0:
160; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
161; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
162; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
163; CHECK-NEXT:    ret
164  %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
165  call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
166                                          <vscale x 8 x i16> %v1,
167                                          <vscale x 8 x i1> %pred,
168                                          ptr %gep)
169  ret void
170}
171
172define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
173; CHECK-LABEL: st2q_si_i32:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
176; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
177; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
178; CHECK-NEXT:    ret
179  %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
180  call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
181                                          <vscale x 4 x i32> %v1,
182                                          <vscale x 4 x i1> %pred,
183                                          ptr %gep)
184  ret void
185}
186
187define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
188; CHECK-LABEL: st2q_si_i64:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
191; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
192; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
193; CHECK-NEXT:    ret
194  %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
195  call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
196                                          <vscale x 2 x i64> %v1,
197                                          <vscale x 2 x i1> %pred,
198                                          ptr %gep)
199  ret void
200}
201
202define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
203; CHECK-LABEL: st2q_si_f16:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
206; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
207; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
208; CHECK-NEXT:    ret
209  %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
210  call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
211                                          <vscale x 8 x half> %v1,
212                                          <vscale x 8 x i1> %pred,
213                                          ptr %gep)
214  ret void
215}
216
217define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
218; CHECK-LABEL: st2q_si_f32:
219; CHECK:       // %bb.0:
220; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
221; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
222; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
223; CHECK-NEXT:    ret
224  %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
225  call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
226                                          <vscale x 4 x float> %v1,
227                                          <vscale x 4 x i1> %pred,
228                                          ptr %gep)
229  ret void
230}
231
232define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
233; CHECK-LABEL: st2q_si_f64:
234; CHECK:       // %bb.0:
235; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
236; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
237; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
238; CHECK-NEXT:    ret
239  %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
240  call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
241                                          <vscale x 2 x double> %v1,
242                                          <vscale x 2 x i1> %pred,
243                                          ptr %gep)
244  ret void
245}
246
247define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
248; CHECK-LABEL: st2q_si_bf16:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
251; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
252; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
253; CHECK-NEXT:    ret
254  %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
255  call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
256                                          <vscale x 8 x bfloat> %v1,
257                                          <vscale x 8 x i1> %pred,
258                                          ptr %gep)
259  ret void
260}
261
262
263;
264; ST3Q
265;
266define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
267; CHECK-LABEL: st3q_ss_i8:
268; CHECK:       // %bb.0:
269; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
270; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
271; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
272; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
273; CHECK-NEXT:    ret
274  %1 = getelementptr i128, ptr %addr, i64 %offset
275  call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>%v0,
276                                           <vscale x 16 x i8> %v1,
277                                           <vscale x 16 x i8> %v2,
278                                           <vscale x 16 x i1> %pred,
279                                           ptr %1)
280  ret void
281}
282
283define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
284; CHECK-LABEL: st3q_ss_i16:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
287; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
288; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
289; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
290; CHECK-NEXT:    ret
291  %1 = getelementptr i128, ptr %addr, i64 %offset
292  call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
293                                           <vscale x 8 x i16> %v1,
294                                           <vscale x 8 x i16> %v2,
295                                           <vscale x 8 x i1> %pred,
296                                           ptr %1)
297  ret void
298}
299
300define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
301; CHECK-LABEL: st3q_ss_i32:
302; CHECK:       // %bb.0:
303; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
304; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
305; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
306; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
307; CHECK-NEXT:    ret
308  %1 = getelementptr i128, ptr %addr, i64 %offset
309  call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
310                                           <vscale x 4 x i32> %v1,
311                                           <vscale x 4 x i32> %v2,
312                                           <vscale x 4 x i1> %pred,
313                                           ptr %1)
314  ret void
315}
316
317define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
318; CHECK-LABEL: st3q_ss_i64:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
321; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
322; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
323; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
324; CHECK-NEXT:    ret
325  %1 = getelementptr i128, ptr %addr, i64 %offset
326  call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
327                                           <vscale x 2 x i64> %v1,
328                                           <vscale x 2 x i64> %v2,
329                                           <vscale x 2 x i1> %pred,
330                                           ptr %1)
331  ret void
332}
333
334define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
335; CHECK-LABEL: st3q_ss_f16:
336; CHECK:       // %bb.0:
337; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
338; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
339; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
340; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
341; CHECK-NEXT:    ret
342  %1 = getelementptr i128, ptr %addr, i64 %offset
343  call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
344                                           <vscale x 8 x half> %v1,
345                                           <vscale x 8 x half> %v2,
346                                           <vscale x 8 x i1> %pred,
347                                           ptr %1)
348  ret void
349}
350
351define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
352; CHECK-LABEL: st3q_ss_f32:
353; CHECK:       // %bb.0:
354; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
355; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
356; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
357; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
358; CHECK-NEXT:    ret
359  %1 = getelementptr i128, ptr %addr, i64 %offset
360  call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
361                                           <vscale x 4 x float> %v1,
362                                           <vscale x 4 x float> %v2,
363                                           <vscale x 4 x i1> %pred,
364                                           ptr %1)
365  ret void
366}
367
368define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
369; CHECK-LABEL: st3q_ss_f64:
370; CHECK:       // %bb.0:
371; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
372; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
373; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
374; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
375; CHECK-NEXT:    ret
376  %1 = getelementptr i128, ptr %addr, i64 %offset
377  call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
378                                           <vscale x 2 x double> %v1,
379                                           <vscale x 2 x double> %v2,
380                                           <vscale x 2 x i1> %pred,
381                                           ptr %1)
382  ret void
383}
384
385define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
386; CHECK-LABEL: st3q_ss_bf16:
387; CHECK:       // %bb.0:
388; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
389; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
390; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
391; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
392; CHECK-NEXT:    ret
393  %1 = getelementptr i128, ptr %addr, i64 %offset
394  call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
395                                            <vscale x 8 x bfloat> %v1,
396                                            <vscale x 8 x bfloat> %v2,
397                                            <vscale x 8 x i1> %pred,
398                                            ptr %1)
399  ret void
400}
401
402define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
403; CHECK-LABEL: st3q_si_i8_off24:
404; CHECK:       // %bb.0:
405; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
406; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
407; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
408; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
409; CHECK-NEXT:    ret
410  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
411  call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
412                                           <vscale x 16 x i8> %v1,
413                                           <vscale x 16 x i8> %v2,
414                                           <vscale x 16 x i1> %pred,
415                                           ptr %base)
416  ret void
417}
418
419define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
420; CHECK-LABEL: st3q_si_i8_off21:
421; CHECK:       // %bb.0:
422; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
423; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
424; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
425; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
426; CHECK-NEXT:    ret
427  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
428  call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
429                                           <vscale x 16 x i8> %v1,
430                                           <vscale x 16 x i8> %v2,
431                                           <vscale x 16 x i1> %pred,
432                                           ptr %base)
433  ret void
434}
435
436define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr) {
437; CHECK-LABEL: st3q_si_i16:
438; CHECK:       // %bb.0:
439; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
440; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
441; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
442; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
443; CHECK-NEXT:    ret
444  %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
445  call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
446                                           <vscale x 8 x i16> %v1,
447                                           <vscale x 8 x i16> %v2,
448                                           <vscale x 8 x i1> %pred,
449                                           ptr %base)
450  ret void
451}
452
453define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
454; CHECK-LABEL: st3q_si_i32:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
457; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
458; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
459; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
460; CHECK-NEXT:    ret
461  %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
462  call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
463                                           <vscale x 4 x i32> %v1,
464                                           <vscale x 4 x i32> %v2,
465                                           <vscale x 4 x i1> %pred,
466                                           ptr %base)
467  ret void
468}
469
470define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
471; CHECK-LABEL: st3q_si_i64:
472; CHECK:       // %bb.0:
473; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
474; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
475; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
476; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
477; CHECK-NEXT:    ret
478  %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
479  call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
480                                           <vscale x 2 x i64> %v1,
481                                           <vscale x 2 x i64> %v2,
482                                           <vscale x 2 x i1> %pred,
483                                           ptr %base)
484  ret void
485}
486
487define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
488; CHECK-LABEL: st3q_si_f16:
489; CHECK:       // %bb.0:
490; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
491; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
492; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
493; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
494; CHECK-NEXT:    ret
495  %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
496  call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
497                                           <vscale x 8 x half> %v1,
498                                           <vscale x 8 x half> %v2,
499                                           <vscale x 8 x i1> %pred,
500                                           ptr %base)
501  ret void
502}
503
504define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
505; CHECK-LABEL: st3q_si_f32:
506; CHECK:       // %bb.0:
507; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
508; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
509; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
510; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
511; CHECK-NEXT:    ret
512  %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
513  call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
514                                           <vscale x 4 x float> %v1,
515                                           <vscale x 4 x float> %v2,
516                                           <vscale x 4 x i1> %pred,
517                                           ptr %base)
518  ret void
519}
520
521define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
522; CHECK-LABEL: st3q_si_f64:
523; CHECK:       // %bb.0:
524; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
525; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
526; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
527; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
528; CHECK-NEXT:    ret
529  %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
530  call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
531                                           <vscale x 2 x double> %v1,
532                                           <vscale x 2 x double> %v2,
533                                           <vscale x 2 x i1> %pred,
534                                           ptr %base)
535  ret void
536}
537
538define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
539; CHECK-LABEL: st3q_si_bf16:
540; CHECK:       // %bb.0:
541; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
542; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
543; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
544; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
545; CHECK-NEXT:    ret
546  %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
547  call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
548                                            <vscale x 8 x bfloat> %v1,
549                                            <vscale x 8 x bfloat> %v2,
550                                            <vscale x 8 x i1> %pred,
551                                            ptr %base)
552  ret void
553}
554
555;
556; ST4Q
557;
558define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
559; CHECK-LABEL: st4q_ss_i8:
560; CHECK:       // %bb.0:
561; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
562; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
563; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
564; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
565; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
566; CHECK-NEXT:    ret
567  %1 = getelementptr i128, ptr %addr, i64 %offset
568  call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>%v0,
569                                           <vscale x 16 x i8> %v1,
570                                           <vscale x 16 x i8> %v2,
571                                           <vscale x 16 x i8> %v3,
572                                           <vscale x 16 x i1> %pred,
573                                           ptr %1)
574  ret void
575}
576
577define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
578; CHECK-LABEL: st4q_ss_i16:
579; CHECK:       // %bb.0:
580; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
581; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
582; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
583; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
584; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
585; CHECK-NEXT:    ret
586  %1 = getelementptr i128, ptr %addr, i64 %offset
587  call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
588                                           <vscale x 8 x i16> %v1,
589                                           <vscale x 8 x i16> %v2,
590                                           <vscale x 8 x i16> %v3,
591                                           <vscale x 8 x i1> %pred,
592                                           ptr %1)
593  ret void
594}
595
596define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
597; CHECK-LABEL: st4q_ss_i32:
598; CHECK:       // %bb.0:
599; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
600; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
601; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
602; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
603; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
604; CHECK-NEXT:    ret
605  %1 = getelementptr i128, ptr %addr, i64 %offset
606  call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
607                                           <vscale x 4 x i32> %v1,
608                                           <vscale x 4 x i32> %v2,
609                                           <vscale x 4 x i32> %v3,
610                                           <vscale x 4 x i1> %pred,
611                                           ptr %1)
612  ret void
613}
614
615define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
616; CHECK-LABEL: st4q_ss_i64:
617; CHECK:       // %bb.0:
618; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
619; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
620; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
621; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
622; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
623; CHECK-NEXT:    ret
624  %1 = getelementptr i128, ptr %addr, i64 %offset
625  call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
626                                           <vscale x 2 x i64> %v1,
627                                           <vscale x 2 x i64> %v2,
628                                           <vscale x 2 x i64> %v3,
629                                           <vscale x 2 x i1> %pred,
630                                           ptr %1)
631  ret void
632}
633
634define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
635; CHECK-LABEL: st4q_ss_f16:
636; CHECK:       // %bb.0:
637; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
638; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
639; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
640; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
641; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
642; CHECK-NEXT:    ret
643  %1 = getelementptr i128, ptr %addr, i64 %offset
644  call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
645                                           <vscale x 8 x half> %v1,
646                                           <vscale x 8 x half> %v2,
647                                           <vscale x 8 x half> %v3,
648                                           <vscale x 8 x i1> %pred,
649                                           ptr %1)
650  ret void
651}
652
653define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
654; CHECK-LABEL: st4q_ss_f32:
655; CHECK:       // %bb.0:
656; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
657; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
658; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
659; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
660; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
661; CHECK-NEXT:    ret
662  %1 = getelementptr i128, ptr %addr, i64 %offset
663  call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
664                                           <vscale x 4 x float> %v1,
665                                           <vscale x 4 x float> %v2,
666                                           <vscale x 4 x float> %v3,
667                                           <vscale x 4 x i1> %pred,
668                                           ptr %1)
669  ret void
670}
671
672define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
673; CHECK-LABEL: st4q_ss_f64:
674; CHECK:       // %bb.0:
675; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
676; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
677; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
678; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
679; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
680; CHECK-NEXT:    ret
681  %1 = getelementptr i128, ptr %addr, i64 %offset
682  call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
683                                           <vscale x 2 x double> %v1,
684                                           <vscale x 2 x double> %v2,
685                                           <vscale x 2 x double> %v3,
686                                           <vscale x 2 x i1> %pred,
687                                           ptr %1)
688  ret void
689}
690
691define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
692; CHECK-LABEL: st4q_ss_bf16:
693; CHECK:       // %bb.0:
694; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
695; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
696; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
697; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
698; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
699; CHECK-NEXT:    ret
700  %1 = getelementptr i128, ptr %addr, i64 %offset
701  call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
702                                            <vscale x 8 x bfloat> %v1,
703                                            <vscale x 8 x bfloat> %v2,
704                                            <vscale x 8 x bfloat> %v3,
705                                            <vscale x 8 x i1> %pred,
706                                            ptr %1)
707  ret void
708}
709
710define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
711; CHECK-LABEL: st4q_si_i8_off32:
712; CHECK:       // %bb.0:
713; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
714; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
715; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
716; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
717; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
718; CHECK-NEXT:    ret
719  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
720  call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
721                                           <vscale x 16 x i8> %v1,
722                                           <vscale x 16 x i8> %v2,
723                                           <vscale x 16 x i8> %v3,
724                                           <vscale x 16 x i1> %pred,
725                                           ptr %base)
726  ret void
727}
728
729define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
730; CHECK-LABEL: st4q_si_i8_off28:
731; CHECK:       // %bb.0:
732; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
733; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
734; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
735; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
736; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
737; CHECK-NEXT:    ret
738  %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
739  call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
740                                           <vscale x 16 x i8> %v1,
741                                           <vscale x 16 x i8> %v2,
742                                           <vscale x 16 x i8> %v3,
743                                           <vscale x 16 x i1> %pred,
744                                           ptr %base)
745  ret void
746}
747
748define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3,  <vscale x 8 x i1> %pred, ptr %addr) {
749; CHECK-LABEL: st4q_si_i16:
750; CHECK:       // %bb.0:
751; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
752; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
753; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
754; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
755; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
756; CHECK-NEXT:    ret
757  %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
758  call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
759                                           <vscale x 8 x i16> %v1,
760                                           <vscale x 8 x i16> %v2,
761                                           <vscale x 8 x i16> %v3,
762                                           <vscale x 8 x i1> %pred,
763                                           ptr %base)
764  ret void
765}
766
767define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
768; CHECK-LABEL: st4q_si_i32:
769; CHECK:       // %bb.0:
770; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
771; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
772; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
773; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
774; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
775; CHECK-NEXT:    ret
776  %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
777  call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
778                                           <vscale x 4 x i32> %v1,
779                                           <vscale x 4 x i32> %v2,
780                                           <vscale x 4 x i32> %v3,
781                                           <vscale x 4 x i1> %pred,
782                                           ptr %base1)
783  ret void
784}
785
786define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
787; CHECK-LABEL: st4q_si_i64:
788; CHECK:       // %bb.0:
789; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
790; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
791; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
792; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
793; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
794; CHECK-NEXT:    ret
795  %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
796  call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
797                                           <vscale x 2 x i64> %v1,
798                                           <vscale x 2 x i64> %v2,
799                                           <vscale x 2 x i64> %v3,
800                                           <vscale x 2 x i1> %pred,
801                                           ptr %base)
802  ret void
803}
804
805define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
806; CHECK-LABEL: st4q_si_f16:
807; CHECK:       // %bb.0:
808; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
809; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
810; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
811; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
812; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
813; CHECK-NEXT:    ret
814  %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
815  call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
816                                           <vscale x 8 x half> %v1,
817                                           <vscale x 8 x half> %v2,
818                                           <vscale x 8 x half> %v3,
819                                           <vscale x 8 x i1> %pred,
820                                           ptr %base)
821  ret void
822}
823
824define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3,  <vscale x 4 x i1> %pred, ptr %addr) {
825; CHECK-LABEL: st4q_si_f32:
826; CHECK:       // %bb.0:
827; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
828; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
829; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
830; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
831; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
832; CHECK-NEXT:    ret
833  %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
834  call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
835                                           <vscale x 4 x float> %v1,
836                                           <vscale x 4 x float> %v2,
837                                           <vscale x 4 x float> %v3,
838                                           <vscale x 4 x i1> %pred,
839                                           ptr %base)
840  ret void
841}
842
843define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
844; CHECK-LABEL: st4q_si_f64:
845; CHECK:       // %bb.0:
846; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
847; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
848; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
849; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
850; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
851; CHECK-NEXT:    ret
852  %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
853  call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
854                                           <vscale x 2 x double> %v1,
855                                           <vscale x 2 x double> %v2,
856                                           <vscale x 2 x double> %v3,
857                                           <vscale x 2 x i1> %pred,
858                                           ptr %base)
859  ret void
860}
861
862define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
863; CHECK-LABEL: st4q_si_bf16:
864; CHECK:       // %bb.0:
865; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
866; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
867; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
868; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
869; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
870; CHECK-NEXT:    ret
871  %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28
872  call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
873                                            <vscale x 8 x bfloat> %v1,
874                                            <vscale x 8 x bfloat> %v2,
875                                            <vscale x 8 x bfloat> %v3,
876                                            <vscale x 8 x i1> %pred,
877                                            ptr %base)
878  ret void
879}
880
881
882declare void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
883declare void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
884declare void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
885declare void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
886
887declare void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
888declare void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
889declare void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
890declare void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
891
892declare void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
893declare void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
894declare void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
895declare void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
896
897declare void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
898declare void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
899declare void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
900declare void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
901
902declare void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i1>, ptr)
903declare void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
904declare void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i1>, ptr)
905declare void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
906
907declare void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
908declare void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
909declare void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
910declare void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
911