xref: /llvm-project/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
3
4;
5; Move Multi-Vector To Tile (Write) x 2
6;
7
8; Horizontal
9
10define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
11; CHECK-LABEL: za_write_vg2_horiz_b:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
14; CHECK-NEXT:    mov w12, w0
15; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
16; CHECK-NEXT:    mov za0h.b[w12, 0:1], { z0.b, z1.b }
17; CHECK-NEXT:    mov za0h.b[w12, 14:15], { z0.b, z1.b }
18; CHECK-NEXT:    ret
19  call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
20  %slice.14 = add i32 %slice, 14
21  call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice.14, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
22  ret void
23}
24
25define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
26; CHECK-LABEL: za_write_vg2_horiz_h:
27; CHECK:       // %bb.0:
28; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
29; CHECK-NEXT:    mov w12, w0
30; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
31; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
32; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
33; CHECK-NEXT:    ret
34  call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
35  %slice.6 = add i32 %slice, 6
36  call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 %slice.6, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
37  ret void
38}
39
40define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
41; CHECK-LABEL: za_write_vg2_horiz_f16:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
44; CHECK-NEXT:    mov w12, w0
45; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
46; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
47; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
48; CHECK-NEXT:    ret
49  call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
50  %slice.6 = add i32 %slice, 6
51  call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 %slice.6, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
52  ret void
53}
54
55define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
56; CHECK-LABEL: za_write_vg2_horiz_bf16:
57; CHECK:       // %bb.0:
58; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
59; CHECK-NEXT:    mov w12, w0
60; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
61; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
62; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
63; CHECK-NEXT:    ret
64  call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
65  %slice.6 = add i32 %slice, 6
66  call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 %slice.6, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
67  ret void
68}
69
70define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
71; CHECK-LABEL: za_write_vg2_horiz_s:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
74; CHECK-NEXT:    mov w12, w0
75; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
76; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
77; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
78; CHECK-NEXT:    ret
79  call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
80  %slice.2 = add i32 %slice, 2
81  call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 %slice.2, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
82  ret void
83}
84
85define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
86; CHECK-LABEL: za_write_vg2_horiz_f32:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
89; CHECK-NEXT:    mov w12, w0
90; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
91; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
92; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
93; CHECK-NEXT:    ret
94  call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
95  %slice.2 = add i32 %slice, 2
96  call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 %slice.2, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
97  ret void
98}
99
100define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
101; CHECK-LABEL: za_write_vg2_horiz_d:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
104; CHECK-NEXT:    mov w12, w0
105; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
106; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
107; CHECK-NEXT:    ret
108  call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
109  ret void
110}
111
112define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
113; CHECK-LABEL: za_write_vg2_horiz_f64:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
116; CHECK-NEXT:    mov w12, w0
117; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
118; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
119; CHECK-NEXT:    ret
120  call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
121  ret void
122}
123
124; Vertical
125
126define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
127; CHECK-LABEL: za_write_vg2_vert_b:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
130; CHECK-NEXT:    mov w12, w0
131; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
132; CHECK-NEXT:    mov za0v.b[w12, 0:1], { z0.b, z1.b }
133; CHECK-NEXT:    mov za0v.b[w12, 14:15], { z0.b, z1.b }
134; CHECK-NEXT:    ret
135  call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
136  %slice.14 = add i32 %slice, 14
137  call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice.14, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
138  ret void
139}
140
141define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
142; CHECK-LABEL: za_write_vg2_vert_h:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
145; CHECK-NEXT:    mov w12, w0
146; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
147; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
148; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
149; CHECK-NEXT:    ret
150  call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
151  %slice.6 = add i32 %slice, 6
152  call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 %slice.6, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
153  ret void
154}
155
156define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
157; CHECK-LABEL: za_write_vg2_vert_f16:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
160; CHECK-NEXT:    mov w12, w0
161; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
162; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
163; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
164; CHECK-NEXT:    ret
165  call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
166  %slice.6 = add i32 %slice, 6
167  call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 %slice.6, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
168  ret void
169}
170
171define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
172; CHECK-LABEL: za_write_vg2_vert_bf16:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
175; CHECK-NEXT:    mov w12, w0
176; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
177; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
178; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
179; CHECK-NEXT:    ret
180  call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
181  %slice.6 = add i32 %slice, 6
182  call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 %slice.6, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
183  ret void
184}
185
186define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
187; CHECK-LABEL: za_write_vg2_vert_s:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
190; CHECK-NEXT:    mov w12, w0
191; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
192; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
193; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
194; CHECK-NEXT:    ret
195  call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
196  %slice.2 = add i32 %slice, 2
197  call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 %slice.2, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
198  ret void
199}
200
201define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
202; CHECK-LABEL: za_write_vg2_vert_f32:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
205; CHECK-NEXT:    mov w12, w0
206; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
207; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
208; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
209; CHECK-NEXT:    ret
210  call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
211  %slice.2 = add i32 %slice, 2
212  call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 %slice.2, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
213  ret void
214}
215
216define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
217; CHECK-LABEL: za_write_vg2_vert_d:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
220; CHECK-NEXT:    mov w12, w0
221; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
222; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
223; CHECK-NEXT:    ret
224  call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
225  ret void
226}
227
228define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
229; CHECK-LABEL: za_write_vg2_vert_f64:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
232; CHECK-NEXT:    mov w12, w0
233; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
234; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
235; CHECK-NEXT:    ret
236  call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
237  ret void
238}
239
240;
241; Move Multi-Vector To Tile (Write) x 4
242;
243
244;  Horizontal
245
246define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
247; CHECK-LABEL: za_write_vg4_horiz_b:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
250; CHECK-NEXT:    mov w12, w0
251; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
252; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
253; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
254; CHECK-NEXT:    mov za0h.b[w12, 0:3], { z0.b - z3.b }
255; CHECK-NEXT:    mov za0h.b[w12, 12:15], { z0.b - z3.b }
256; CHECK-NEXT:    ret
257  call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
258  %slice.12 = add i32 %slice, 12
259  call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice.12, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
260  ret void
261}
262
263define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
264; CHECK-LABEL: za_write_vg4_horiz_h:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
267; CHECK-NEXT:    mov w12, w0
268; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
269; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
270; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
271; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
272; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
273; CHECK-NEXT:    ret
274  call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
275  %slice.4 = add i32 %slice, 4
276  call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 %slice.4, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
277  ret void
278}
279
280define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
281; CHECK-LABEL: za_write_vg4_horiz_f16:
282; CHECK:       // %bb.0:
283; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
284; CHECK-NEXT:    mov w12, w0
285; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
286; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
287; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
288; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
289; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
290; CHECK-NEXT:    ret
291  call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
292  %slice.4 = add i32 %slice, 4
293  call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 %slice.4, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
294  ret void
295}
296
297define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
298; CHECK-LABEL: za_write_vg4_horiz_bf16:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
301; CHECK-NEXT:    mov w12, w0
302; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
303; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
304; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
305; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
306; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
307; CHECK-NEXT:    ret
308  call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
309  %slice.4 = add i32 %slice, 4
310  call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 %slice.4, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
311  ret void
312}
313
314define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
315; CHECK-LABEL: za_write_vg4_horiz_s:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
318; CHECK-NEXT:    mov w12, w0
319; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
320; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
321; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
322; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
323; CHECK-NEXT:    ret
324  call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
325  ret void
326}
327
328define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
329; CHECK-LABEL: za_write_vg4_horiz_f32:
330; CHECK:       // %bb.0:
331; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
332; CHECK-NEXT:    mov w12, w0
333; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
334; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
335; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
336; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
337; CHECK-NEXT:    ret
338  call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
339  ret void
340}
341
342define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
343; CHECK-LABEL: za_write_vg4_horiz_d:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
346; CHECK-NEXT:    mov w12, w0
347; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
348; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
349; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
350; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
351; CHECK-NEXT:    ret
352  call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
353  ret void
354}
355
356define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
357; CHECK-LABEL: za_write_vg4_horiz_f64:
358; CHECK:       // %bb.0:
359; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
360; CHECK-NEXT:    mov w12, w0
361; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
362; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
363; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
364; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
365; CHECK-NEXT:    ret
366  call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
367  ret void
368}
369
370; Vertical
371
372define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
373; CHECK-LABEL: za_write_vg4_vert_b:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
376; CHECK-NEXT:    mov w12, w0
377; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
378; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
379; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
380; CHECK-NEXT:    mov za0v.b[w12, 0:3], { z0.b - z3.b }
381; CHECK-NEXT:    mov za0v.b[w12, 12:15], { z0.b - z3.b }
382; CHECK-NEXT:    ret
383  call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
384  %slice.12 = add i32 %slice, 12
385  call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice.12, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
386  ret void
387}
388
389define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
390; CHECK-LABEL: za_write_vg4_vert_h:
391; CHECK:       // %bb.0:
392; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
393; CHECK-NEXT:    mov w12, w0
394; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
395; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
396; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
397; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
398; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
399; CHECK-NEXT:    ret
400  call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
401  %slice.4 = add i32 %slice, 4
402  call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 %slice.4, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
403  ret void
404}
405
406define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
407; CHECK-LABEL: za_write_vg4_vert_f16:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
410; CHECK-NEXT:    mov w12, w0
411; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
412; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
413; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
414; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
415; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
416; CHECK-NEXT:    ret
417  call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
418  %slice.4 = add i32 %slice, 4
419  call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 %slice.4, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
420  ret void
421}
422
423define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
424; CHECK-LABEL: za_write_vg4_vert_bf16:
425; CHECK:       // %bb.0:
426; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
427; CHECK-NEXT:    mov w12, w0
428; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
429; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
430; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
431; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
432; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
433; CHECK-NEXT:    ret
434  call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
435  %slice.4 = add i32 %slice, 4
436  call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 %slice.4, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
437  ret void
438}
439
440define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
441; CHECK-LABEL: za_write_vg4_vert_s:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
444; CHECK-NEXT:    mov w12, w0
445; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
446; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
447; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
448; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
449; CHECK-NEXT:    ret
450  call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
451  ret void
452}
453
454define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
455; CHECK-LABEL: za_write_vg4_vert_f32:
456; CHECK:       // %bb.0:
457; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
458; CHECK-NEXT:    mov w12, w0
459; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
460; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
461; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
462; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
463; CHECK-NEXT:    ret
464  call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
465  ret void
466}
467
468define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
469; CHECK-LABEL: za_write_vg4_vert_d:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
472; CHECK-NEXT:    mov w12, w0
473; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
474; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
475; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
476; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
477; CHECK-NEXT:    ret
478  call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
479  ret void
480}
481
482define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
483; CHECK-LABEL: za_write_vg4_vert_f64:
484; CHECK:       // %bb.0:
485; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
486; CHECK-NEXT:    mov w12, w0
487; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
488; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
489; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
490; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
491; CHECK-NEXT:    ret
492  call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
493  ret void
494}
495
496;
497; Move Multi-Vector To ZA (Write) x2
498;
499
500define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2) {
501; CHECK-LABEL: za_write_vg1x2_b:
502; CHECK:       // %bb.0:
503; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
504; CHECK-NEXT:    mov w8, w0
505; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
506; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
507; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
508; CHECK-NEXT:    ret
509  call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2)
510  %slice.7 = add i32 %slice, 7
511  call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice.7, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2)
512  ret void
513}
514
515define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2) {
516; CHECK-LABEL: za_write_vg1x2_h:
517; CHECK:       // %bb.0:
518; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
519; CHECK-NEXT:    mov w8, w0
520; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
521; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
522; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
523; CHECK-NEXT:    ret
524  call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2)
525  %slice.7 = add i32 %slice, 7
526  call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice.7, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2)
527  ret void
528}
529
530define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2) {
531; CHECK-LABEL: za_write_vg1x2_f16:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
534; CHECK-NEXT:    mov w8, w0
535; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
536; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
537; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
538; CHECK-NEXT:    ret
539  call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2)
540  %slice.7 = add i32 %slice, 7
541  call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice.7, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2)
542  ret void
543}
544
545define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2) {
546; CHECK-LABEL: za_write_vg1x2_bf16:
547; CHECK:       // %bb.0:
548; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
549; CHECK-NEXT:    mov w8, w0
550; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
551; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
552; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
553; CHECK-NEXT:    ret
554  call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2)
555  %slice.7 = add i32 %slice, 7
556  call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2)
557  ret void
558}
559
560define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2) {
561; CHECK-LABEL: za_write_vg1x2_s:
562; CHECK:       // %bb.0:
563; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
564; CHECK-NEXT:    mov w8, w0
565; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
566; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
567; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
568; CHECK-NEXT:    ret
569  call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2)
570  %slice.7 = add i32 %slice, 7
571  call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2)
572  ret void
573}
574
575define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2) {
576; CHECK-LABEL: za_write_vg1x2_f32:
577; CHECK:       // %bb.0:
578; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
579; CHECK-NEXT:    mov w8, w0
580; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
581; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
582; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
583; CHECK-NEXT:    ret
584  call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2)
585  %slice.7 = add i32 %slice, 7
586  call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice.7, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2)
587  ret void
588}
589
590define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2) {
591; CHECK-LABEL: za_write_vg1x2_d:
592; CHECK:       // %bb.0:
593; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
594; CHECK-NEXT:    mov w8, w0
595; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
596; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
597; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
598; CHECK-NEXT:    ret
599  call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2)
600  %slice.7 = add i32 %slice, 7
601  call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2)
602  ret void
603}
604
605define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2) {
606; CHECK-LABEL: za_write_vg1x2_f64:
607; CHECK:       // %bb.0:
608; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
609; CHECK-NEXT:    mov w8, w0
610; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
611; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
612; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
613; CHECK-NEXT:    ret
614  call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2)
615  %slice.7 = add i32 %slice, 7
616  call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice.7, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2)
617  ret void
618}
619
620;
621; Move Multi-Vector To ZA (Write) x4
622;
623
624define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4) {
625; CHECK-LABEL: za_write_vg1x4_b:
626; CHECK:       // %bb.0:
627; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
628; CHECK-NEXT:    mov w8, w0
629; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
630; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
631; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
632; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
633; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
634; CHECK-NEXT:    ret
635  call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4)
636  %slice.7 = add i32 %slice, 7
637  call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice.7, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4)
638  ret void
639}
640
641define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4) {
642; CHECK-LABEL: za_write_vg1x4_h:
643; CHECK:       // %bb.0:
644; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
645; CHECK-NEXT:    mov w8, w0
646; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
647; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
648; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
649; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
650; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
651; CHECK-NEXT:    ret
652  call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4)
653  %slice.7 = add i32 %slice, 7
654  call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice.7, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4)
655  ret void
656}
657
658define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4) {
659; CHECK-LABEL: za_write_vg1x4_f16:
660; CHECK:       // %bb.0:
661; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
662; CHECK-NEXT:    mov w8, w0
663; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
664; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
665; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
666; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
667; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
668; CHECK-NEXT:    ret
669  call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4)
670  %slice.7 = add i32 %slice, 7
671  call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice.7, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4)
672  ret void
673}
674
675define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4) {
676; CHECK-LABEL: za_write_vg1x4_bf16:
677; CHECK:       // %bb.0:
678; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
679; CHECK-NEXT:    mov w8, w0
680; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
681; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
682; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
683; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
684; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
685; CHECK-NEXT:    ret
686  call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4)
687  %slice.7 = add i32 %slice, 7
688  call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4)
689  ret void
690}
691
692define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4) {
693; CHECK-LABEL: za_write_vg1x4_s:
694; CHECK:       // %bb.0:
695; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
696; CHECK-NEXT:    mov w8, w0
697; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
698; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
699; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
700; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
701; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
702; CHECK-NEXT:    ret
703  call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4)
704  %slice.7 = add i32 %slice, 7
705  call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4)
706  ret void
707}
708
709define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4) {
710; CHECK-LABEL: za_write_vg1x4_f32:
711; CHECK:       // %bb.0:
712; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
713; CHECK-NEXT:    mov w8, w0
714; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
715; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
716; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
717; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
718; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
719; CHECK-NEXT:    ret
720  call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4)
721  %slice.7 = add i32 %slice, 7
722  call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice.7, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4)
723  ret void
724}
725
726define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4) {
727; CHECK-LABEL: za_write_vg1x4_d:
728; CHECK:       // %bb.0:
729; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
730; CHECK-NEXT:    mov w8, w0
731; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
732; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
733; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
734; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
735; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
736; CHECK-NEXT:    ret
737  call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4)
738  %slice.7 = add i32 %slice, 7
739  call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4)
740  ret void
741}
742
743define void @za_write_vg1x4_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4) {
744; CHECK-LABEL: za_write_vg1x4_f64:
745; CHECK:       // %bb.0:
746; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
747; CHECK-NEXT:    mov w8, w0
748; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
749; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
750; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
751; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
752; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
753; CHECK-NEXT:    ret
754  call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4)
755  %slice.7 = add i32 %slice, 7
756  call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice.7, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4)
757  ret void
758}
759
760declare void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>)
761declare void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>)
762declare void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>)
763declare void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
764declare void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>)
765declare void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>)
766declare void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
767declare void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>)
768
769declare void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>)
770declare void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>)
771declare void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>)
772declare void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
773declare void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>)
774declare void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>)
775declare void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
776declare void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>)
777
778declare void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
779declare void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
780declare void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
781declare void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
782declare void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
783declare void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
784declare void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
785declare void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
786
787declare void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
788declare void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
789declare void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
790declare void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
791declare void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
792declare void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
793declare void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
794declare void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
795
796declare void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32, <vscale x 16 x i8>, <vscale x 16 x i8>)
797declare void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32, <vscale x 8 x i16>, <vscale x 8 x i16>)
798declare void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>)
799declare void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
800declare void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>)
801declare void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
802declare void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>)
803declare void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>)
804
805declare void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
806declare void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
807declare void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
808declare void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
809declare void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
810declare void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
811declare void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
812declare void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
813