xref: /llvm-project/mlir/test/Target/LLVMIR/arm-sme.mlir (revision 95ef8e386823717efeb2b7b1d02bfbb28473cccc)
1// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
2
3// CHECK-LABEL: @arm_sme_zero
4llvm.func @arm_sme_zero() {
5  // CHECK: call void @llvm.aarch64.sme.zero(i32 0)
6  "arm_sme.intr.zero"() <{tile_mask = 0 : i32}> : () -> ()
7  llvm.return
8}
9
10// -----
11
12// CHECK-LABEL: @arm_sme_fmopa
13llvm.func @arm_sme_fmopa(%nxv2f64 : vector<[2]xf64>,
14                         %nxv4f32 : vector<[4]xf32>,
15                         %nxv8f16 : vector<[8]xf16>,
16                         %nxv8bf16: vector<[8]xbf16>,
17                         %nxv2i1  : vector<[2]xi1>,
18                         %nxv4i1  : vector<[4]xi1>,
19                         %nxv8i1  : vector<[8]xi1>) {
20  // CHECK: call void @llvm.aarch64.sme.mopa.nxv2f64
21  "arm_sme.intr.mopa"(%nxv2i1, %nxv2i1, %nxv2f64, %nxv2f64) <{tile_id = 0 : i32}> :
22    (vector<[2]xi1>, vector<[2]xi1>, vector<[2]xf64>, vector<[2]xf64>) -> ()
23  // CHECK: call void @llvm.aarch64.sme.mopa.nxv4f32
24  "arm_sme.intr.mopa"(%nxv4i1, %nxv4i1, %nxv4f32, %nxv4f32) <{tile_id = 0 : i32}> :
25    (vector<[4]xi1>, vector<[4]xi1>, vector<[4]xf32>, vector<[4]xf32>) -> ()
26  // CHECK: call void @llvm.aarch64.sme.mopa.wide.nxv8f16
27  "arm_sme.intr.mopa.wide"(%nxv8i1, %nxv8i1, %nxv8f16, %nxv8f16) <{tile_id = 0 : i32}> :
28    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xf16>, vector<[8]xf16>) -> ()
29  // CHECK: call void @llvm.aarch64.sme.mopa.wide.nxv8bf16
30  "arm_sme.intr.mopa.wide"(%nxv8i1, %nxv8i1, %nxv8bf16, %nxv8bf16) <{tile_id = 0 : i32}> :
31    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xbf16>, vector<[8]xbf16>) -> ()
32  llvm.return
33}
34
35// -----
36
37// CHECK-LABEL: @arm_sme_imopa
38llvm.func @arm_sme_imopa(%nxv8i16 : vector<[8]xi16>,
39                         %nxv16i8 : vector<[16]xi8>,
40                         %nxv8i1  : vector<[8]xi1>,
41                         %nxv16i1 : vector<[16]xi1>) {
42  // CHECK: call void @llvm.aarch64.sme.smopa.wide.nxv8i16
43  "arm_sme.intr.smopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
44    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
45  // CHECK: call void @llvm.aarch64.sme.umopa.wide.nxv8i16
46  "arm_sme.intr.umopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
47    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
48  // CHECK: call void @llvm.aarch64.sme.sumopa.wide.nxv8i16
49  "arm_sme.intr.sumopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
50    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
51  // CHECK: call void @llvm.aarch64.sme.usmopa.wide.nxv8i16
52  "arm_sme.intr.usmopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
53    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
54  // CHECK: call void @llvm.aarch64.sme.smopa.wide.nxv16i8
55  "arm_sme.intr.smopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
56    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
57  // CHECK: call void @llvm.aarch64.sme.umopa.wide.nxv16i8
58  "arm_sme.intr.umopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
59    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
60  // CHECK: call void @llvm.aarch64.sme.sumopa.wide.nxv16i8
61  "arm_sme.intr.sumopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
62    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
63  // CHECK: call void @llvm.aarch64.sme.usmopa.wide.nxv16i8
64  "arm_sme.intr.usmopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
65    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
66  // CHECK: call void @llvm.aarch64.sme.smopa.za32.nxv8i16
67  "arm_sme.intr.smopa.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
68    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
69  // CHECK: call void @llvm.aarch64.sme.umopa.za32.nxv8i16
70  "arm_sme.intr.umopa.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
71    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
72  llvm.return
73}
74
75// -----
76
77// CHECK-LABEL: @arm_sme_fmops
78llvm.func @arm_sme_fmops(%nxv2f64 : vector<[2]xf64>,
79                         %nxv4f32 : vector<[4]xf32>,
80                         %nxv8f16 : vector<[8]xf16>,
81                         %nxv8bf16: vector<[8]xbf16>,
82                         %nxv2i1  : vector<[2]xi1>,
83                         %nxv4i1  : vector<[4]xi1>,
84                         %nxv8i1  : vector<[8]xi1>) {
85  // CHECK: call void @llvm.aarch64.sme.mops.nxv2f64
86  "arm_sme.intr.mops"(%nxv2i1, %nxv2i1, %nxv2f64, %nxv2f64) <{tile_id = 0 : i32}> :
87    (vector<[2]xi1>, vector<[2]xi1>, vector<[2]xf64>, vector<[2]xf64>) -> ()
88  // CHECK: call void @llvm.aarch64.sme.mops.nxv4f32
89  "arm_sme.intr.mops"(%nxv4i1, %nxv4i1, %nxv4f32, %nxv4f32) <{tile_id = 0 : i32}> :
90    (vector<[4]xi1>, vector<[4]xi1>, vector<[4]xf32>, vector<[4]xf32>) -> ()
91  // CHECK: call void @llvm.aarch64.sme.mops.wide.nxv8f16
92  "arm_sme.intr.mops.wide"(%nxv8i1, %nxv8i1, %nxv8f16, %nxv8f16) <{tile_id = 0 : i32}> :
93    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xf16>, vector<[8]xf16>) -> ()
94  // CHECK: call void @llvm.aarch64.sme.mops.wide.nxv8bf16
95  "arm_sme.intr.mops.wide"(%nxv8i1, %nxv8i1, %nxv8bf16, %nxv8bf16) <{tile_id = 0 : i32}> :
96    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xbf16>, vector<[8]xbf16>) -> ()
97  llvm.return
98}
99
100// -----
101
102// CHECK-LABEL: @arm_sme_imops
103llvm.func @arm_sme_imops(%nxv8i16 : vector<[8]xi16>,
104                         %nxv16i8 : vector<[16]xi8>,
105                         %nxv8i1  : vector<[8]xi1>,
106                         %nxv16i1 : vector<[16]xi1>) {
107  // CHECK: call void @llvm.aarch64.sme.smops.wide.nxv8i16
108  "arm_sme.intr.smops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
109    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
110  // CHECK: call void @llvm.aarch64.sme.umops.wide.nxv8i16
111  "arm_sme.intr.umops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
112    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
113  // CHECK: call void @llvm.aarch64.sme.sumops.wide.nxv8i16
114  "arm_sme.intr.sumops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
115    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
116  // CHECK: call void @llvm.aarch64.sme.usmops.wide.nxv8i16
117  "arm_sme.intr.usmops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
118    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
119  // CHECK: call void @llvm.aarch64.sme.smops.wide.nxv16i8
120  "arm_sme.intr.smops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
121    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
122  // CHECK: call void @llvm.aarch64.sme.umops.wide.nxv16i8
123  "arm_sme.intr.umops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
124    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
125  // CHECK: call void @llvm.aarch64.sme.sumops.wide.nxv16i8
126  "arm_sme.intr.sumops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
127    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
128  // CHECK: call void @llvm.aarch64.sme.usmops.wide.nxv16i8
129  "arm_sme.intr.usmops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> :
130    (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> ()
131  // CHECK: call void @llvm.aarch64.sme.smops.za32.nxv8i16
132  "arm_sme.intr.smops.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
133    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
134  // CHECK: call void @llvm.aarch64.sme.umops.za32.nxv8i16
135  "arm_sme.intr.umops.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> :
136    (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> ()
137  llvm.return
138}
139
140// -----
141
142// CHECK-LABEL: @arm_sme_load
143llvm.func @arm_sme_load(%nxv1i1  : vector<[1]xi1>,
144                        %nxv2i1  : vector<[2]xi1>,
145                        %nxv4i1  : vector<[4]xi1>,
146                        %nxv8i1  : vector<[8]xi1>,
147                        %nxv16i1 : vector<[16]xi1>,
148                        %ptr    : !llvm.ptr) {
149  %c0 = llvm.mlir.constant(0 : index) : i32
150  // CHECK: call void @llvm.aarch64.sme.ld1q.horiz
151  "arm_sme.intr.ld1q.horiz"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> :
152              (vector<[1]xi1>, !llvm.ptr, i32) -> ()
153  // CHECK: call void @llvm.aarch64.sme.ld1d.horiz
154  "arm_sme.intr.ld1d.horiz"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> :
155              (vector<[2]xi1>, !llvm.ptr, i32) -> ()
156  // CHECK: call void @llvm.aarch64.sme.ld1w.horiz
157  "arm_sme.intr.ld1w.horiz"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> :
158              (vector<[4]xi1>, !llvm.ptr, i32) -> ()
159  // CHECK: call void @llvm.aarch64.sme.ld1h.horiz
160  "arm_sme.intr.ld1h.horiz"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> :
161              (vector<[8]xi1>, !llvm.ptr, i32) -> ()
162  // CHECK: call void @llvm.aarch64.sme.ld1b.horiz
163  "arm_sme.intr.ld1b.horiz"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> :
164              (vector<[16]xi1>, !llvm.ptr, i32) -> ()
165  // CHECK: call void @llvm.aarch64.sme.ld1q.vert
166  "arm_sme.intr.ld1q.vert"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> :
167              (vector<[1]xi1>, !llvm.ptr, i32) -> ()
168  // CHECK: call void @llvm.aarch64.sme.ld1d.vert
169  "arm_sme.intr.ld1d.vert"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> :
170              (vector<[2]xi1>, !llvm.ptr, i32) -> ()
171  // CHECK: call void @llvm.aarch64.sme.ld1w.vert
172  "arm_sme.intr.ld1w.vert"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> :
173              (vector<[4]xi1>, !llvm.ptr, i32) -> ()
174  // CHECK: call void @llvm.aarch64.sme.ld1h.vert
175  "arm_sme.intr.ld1h.vert"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> :
176              (vector<[8]xi1>, !llvm.ptr, i32) -> ()
177  // CHECK: call void @llvm.aarch64.sme.ld1b.vert
178  "arm_sme.intr.ld1b.vert"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> :
179              (vector<[16]xi1>, !llvm.ptr, i32) -> ()
180  llvm.return
181}
182
183// -----
184
185// CHECK-LABEL: @arm_sme_store
186llvm.func @arm_sme_store(%nxv1i1  : vector<[1]xi1>,
187                         %nxv2i1  : vector<[2]xi1>,
188                         %nxv4i1  : vector<[4]xi1>,
189                         %nxv8i1  : vector<[8]xi1>,
190                         %nxv16i1 : vector<[16]xi1>,
191                         %ptr    : !llvm.ptr) {
192  %c0 = llvm.mlir.constant(0 : index) : i32
193  // CHECK: call void @llvm.aarch64.sme.st1q.horiz
194  "arm_sme.intr.st1q.horiz"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> :
195              (vector<[1]xi1>, !llvm.ptr, i32) -> ()
196  // CHECK: call void @llvm.aarch64.sme.st1d.horiz
197  "arm_sme.intr.st1d.horiz"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> :
198              (vector<[2]xi1>, !llvm.ptr, i32) -> ()
199  // CHECK: call void @llvm.aarch64.sme.st1w.horiz
200  "arm_sme.intr.st1w.horiz"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> :
201              (vector<[4]xi1>, !llvm.ptr, i32) -> ()
202  // CHECK: call void @llvm.aarch64.sme.st1h.horiz
203  "arm_sme.intr.st1h.horiz"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> :
204              (vector<[8]xi1>, !llvm.ptr, i32) -> ()
205  // CHECK: call void @llvm.aarch64.sme.st1b.horiz
206  "arm_sme.intr.st1b.horiz"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> :
207              (vector<[16]xi1>, !llvm.ptr, i32) -> ()
208  // CHECK: call void @llvm.aarch64.sme.st1q.vert
209  "arm_sme.intr.st1q.vert"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> :
210              (vector<[1]xi1>, !llvm.ptr, i32) -> ()
211  // CHECK: call void @llvm.aarch64.sme.st1d.vert
212  "arm_sme.intr.st1d.vert"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> :
213              (vector<[2]xi1>, !llvm.ptr, i32) -> ()
214  // CHECK: call void @llvm.aarch64.sme.st1w.vert
215  "arm_sme.intr.st1w.vert"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> :
216              (vector<[4]xi1>, !llvm.ptr, i32) -> ()
217  // CHECK: call void @llvm.aarch64.sme.st1h.vert
218  "arm_sme.intr.st1h.vert"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> :
219              (vector<[8]xi1>, !llvm.ptr, i32) -> ()
220  // CHECK: call void @llvm.aarch64.sme.st1b.vert
221  "arm_sme.intr.st1b.vert"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> :
222              (vector<[16]xi1>, !llvm.ptr, i32) -> ()
223  // CHECK: call void @llvm.aarch64.sme.str
224  "arm_sme.intr.str"(%c0, %ptr, %c0) : (i32, !llvm.ptr, i32) -> ()
225  llvm.return
226}
227
228// -----
229
230// CHECK-LABEL: @arm_sme_vector_to_tile_horiz
231llvm.func @arm_sme_vector_to_tile_horiz(%tileslice : i32,
232                                        %nxv16i1 : vector<[16]xi1>,
233                                        %nxv8i1 : vector<[8]xi1>,
234                                        %nxv4i1 : vector<[4]xi1>,
235                                        %nxv2i1 : vector<[2]xi1>,
236                                        %nxv1i1 : vector<[1]xi1>,
237                                        %nxv16i8 : vector<[16]xi8>,
238                                        %nxv8i16 : vector<[8]xi16>,
239                                        %nxv4i32 : vector<[4]xi32>,
240                                        %nxv2i64 : vector<[2]xi64>,
241                                        %nxv1i128 : vector<[1]xi128>,
242                                        %nxv8f16 : vector<[8]xf16>,
243                                        %nxv8bf16 : vector<[8]xbf16>,
244                                        %nxv4f32 : vector<[4]xf32>,
245                                        %nxv2f64 : vector<[2]xf64>) {
246  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv16i8
247  "arm_sme.intr.write.horiz"(%tileslice, %nxv16i1, %nxv16i8) <{tile_id = 0 : i32}> :
248      (i32, vector<[16]xi1>, vector<[16]xi8>) -> ()
249  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8i16
250  "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8i16) <{tile_id = 0 : i32}> :
251      (i32, vector<[8]xi1>, vector<[8]xi16>) -> ()
252  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv4i32
253  "arm_sme.intr.write.horiz"(%tileslice, %nxv4i1, %nxv4i32) <{tile_id = 0 : i32}> :
254      (i32, vector<[4]xi1>, vector<[4]xi32>) -> ()
255  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv2i64
256  "arm_sme.intr.write.horiz"(%tileslice, %nxv2i1, %nxv2i64) <{tile_id = 0 : i32}> :
257      (i32, vector<[2]xi1>, vector<[2]xi64>) -> ()
258  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv1i128
259  "arm_sme.intr.write.horiz"(%tileslice, %nxv1i1, %nxv1i128) <{tile_id = 0 : i32}> :
260      (i32, vector<[1]xi1>, vector<[1]xi128>) -> ()
261  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8f16
262  "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8f16) <{tile_id = 0 : i32}> :
263      (i32, vector<[8]xi1>, vector<[8]xf16>) -> ()
264  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8bf16
265  "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8bf16) <{tile_id = 0 : i32}> :
266      (i32, vector<[8]xi1>, vector<[8]xbf16>) -> ()
267  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv4f32
268  "arm_sme.intr.write.horiz"(%tileslice, %nxv4i1, %nxv4f32) <{tile_id = 0 : i32}> :
269      (i32, vector<[4]xi1>, vector<[4]xf32>) -> ()
270  // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv2f64
271  "arm_sme.intr.write.horiz"(%tileslice, %nxv2i1, %nxv2f64) <{tile_id = 0 : i32}> :
272      (i32, vector<[2]xi1>, vector<[2]xf64>) -> ()
273  llvm.return
274}
275
276// -----
277
278// CHECK-LABEL: @arm_sme_vector_to_tile_vert
279llvm.func @arm_sme_vector_to_tile_vert(%tileslice : i32,
280                                       %nxv16i1 : vector<[16]xi1>,
281                                       %nxv8i1 : vector<[8]xi1>,
282                                       %nxv4i1 : vector<[4]xi1>,
283                                       %nxv2i1 : vector<[2]xi1>,
284                                       %nxv1i1 : vector<[1]xi1>,
285                                       %nxv16i8 : vector<[16]xi8>,
286                                       %nxv8i16 : vector<[8]xi16>,
287                                       %nxv4i32 : vector<[4]xi32>,
288                                       %nxv2i64 : vector<[2]xi64>,
289                                       %nxv1i128 : vector<[1]xi128>,
290                                       %nxv8f16 : vector<[8]xf16>,
291                                       %nxv8bf16 : vector<[8]xbf16>,
292                                       %nxv4f32 : vector<[4]xf32>,
293                                       %nxv2f64 : vector<[2]xf64>) {
294  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv16i8
295  "arm_sme.intr.write.vert"(%tileslice, %nxv16i1, %nxv16i8) <{tile_id = 0 : i32}> :
296      (i32, vector<[16]xi1>, vector<[16]xi8>) -> ()
297  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8i16
298  "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8i16) <{tile_id = 0 : i32}> :
299      (i32, vector<[8]xi1>, vector<[8]xi16>) -> ()
300  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv4i32
301  "arm_sme.intr.write.vert"(%tileslice, %nxv4i1, %nxv4i32) <{tile_id = 0 : i32}> :
302      (i32, vector<[4]xi1>, vector<[4]xi32>) -> ()
303  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv2i64
304  "arm_sme.intr.write.vert"(%tileslice, %nxv2i1, %nxv2i64) <{tile_id = 0 : i32}> :
305      (i32, vector<[2]xi1>, vector<[2]xi64>) -> ()
306  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv1i128
307  "arm_sme.intr.write.vert"(%tileslice, %nxv1i1, %nxv1i128) <{tile_id = 0 : i32}> :
308      (i32, vector<[1]xi1>, vector<[1]xi128>) -> ()
309  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8f16
310  "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8f16) <{tile_id = 0 : i32}> :
311      (i32, vector<[8]xi1>, vector<[8]xf16>) -> ()
312  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8bf16
313  "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8bf16) <{tile_id = 0 : i32}> :
314      (i32, vector<[8]xi1>, vector<[8]xbf16>) -> ()
315  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv4f32
316  "arm_sme.intr.write.vert"(%tileslice, %nxv4i1, %nxv4f32) <{tile_id = 0 : i32}> :
317      (i32, vector<[4]xi1>, vector<[4]xf32>) -> ()
318  // CHECK: call void @llvm.aarch64.sme.write.vert.nxv2f64
319  "arm_sme.intr.write.vert"(%tileslice, %nxv2i1, %nxv2f64) <{tile_id = 0 : i32}> :
320      (i32, vector<[2]xi1>, vector<[2]xf64>) -> ()
321  llvm.return
322}
323
324// -----
325
326
327llvm.func @arm_sme_tile_slice_to_vector_horiz(%tileslice : i32,
328                                              %nxv16i1   : vector<[16]xi1>,
329                                              %nxv8i1    : vector<[8]xi1>,
330                                              %nxv4i1    : vector<[4]xi1>,
331                                              %nxv2i1    : vector<[2]xi1>,
332                                              %nxv1i1    : vector<[1]xi1>,
333                                              %nxv16i8   : vector<[16]xi8>,
334                                              %nxv8i16   : vector<[8]xi16>,
335                                              %nxv4i32   : vector<[4]xi32>,
336                                              %nxv2i64   : vector<[2]xi64>,
337                                              %nxv1i128  : vector<[1]xi128>,
338                                              %nxv8f16   : vector<[8]xf16>,
339                                              %nxv8bf16  : vector<[8]xbf16>,
340                                              %nxv4f32   : vector<[4]xf32>,
341                                              %nxv2f64   : vector<[2]xf64>) {
342  // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8
343  %res0 = "arm_sme.intr.read.horiz"(%nxv16i8, %nxv16i1, %tileslice) <{tile_id = 0 : i32}>
344    : (vector<[16]xi8>, vector<[16]xi1>, i32) -> vector<[16]xi8>
345  // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16
346  %res1 = "arm_sme.intr.read.horiz"(%nxv8i16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
347    : (vector<[8]xi16>, vector<[8]xi1>, i32) -> vector<[8]xi16>
348  // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32
349  %res2 = "arm_sme.intr.read.horiz"(%nxv4i32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}>
350    : (vector<[4]xi32>, vector<[4]xi1>, i32) -> vector<[4]xi32>
351  // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64
352  %res3 = "arm_sme.intr.read.horiz"(%nxv2i64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}>
353    : (vector<[2]xi64>, vector<[2]xi1>, i32) -> vector<[2]xi64>
354  // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.horiz.nxv1i128
355  %res4 = "arm_sme.intr.read.horiz"(%nxv1i128, %nxv1i1, %tileslice) <{tile_id = 0 : i32}>
356    : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128>
357  // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16
358  %res5 = "arm_sme.intr.read.horiz"(%nxv8f16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
359    : (vector<[8]xf16>, vector<[8]xi1>, i32) -> vector<[8]xf16>
360  // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16
361  %res6 = "arm_sme.intr.read.horiz"(%nxv8bf16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
362    : (vector<[8]xbf16>, vector<[8]xi1>, i32) -> vector<[8]xbf16>
363  // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32
364  %res7 = "arm_sme.intr.read.horiz"(%nxv4f32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}>
365    : (vector<[4]xf32>, vector<[4]xi1>, i32) -> vector<[4]xf32>
366  // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64
367  %res8 = "arm_sme.intr.read.horiz"(%nxv2f64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}>
368    : (vector<[2]xf64>, vector<[2]xi1>, i32) -> vector<[2]xf64>
369  llvm.return
370}
371
372// -----
373
374llvm.func @arm_sme_tile_slice_to_vector_vert(%tileslice : i32,
375                                              %nxv16i1  : vector<[16]xi1>,
376                                              %nxv8i1   : vector<[8]xi1>,
377                                              %nxv4i1   : vector<[4]xi1>,
378                                              %nxv2i1   : vector<[2]xi1>,
379                                              %nxv1i1   : vector<[1]xi1>,
380                                              %nxv16i8  : vector<[16]xi8>,
381                                              %nxv8i16  : vector<[8]xi16>,
382                                              %nxv4i32  : vector<[4]xi32>,
383                                              %nxv2i64  : vector<[2]xi64>,
384                                              %nxv1i128 : vector<[1]xi128>,
385                                              %nxv8f16  : vector<[8]xf16>,
386                                              %nxv8bf16 : vector<[8]xbf16>,
387                                              %nxv4f32  : vector<[4]xf32>,
388                                              %nxv2f64  : vector<[2]xf64>) {
389  // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8
390  %res0 = "arm_sme.intr.read.vert"(%nxv16i8, %nxv16i1, %tileslice) <{tile_id = 0 : i32}>
391    : (vector<[16]xi8>, vector<[16]xi1>, i32) -> vector<[16]xi8>
392  // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16
393  %res1 = "arm_sme.intr.read.vert"(%nxv8i16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
394    : (vector<[8]xi16>, vector<[8]xi1>, i32) -> vector<[8]xi16>
395  // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32
396  %res2 = "arm_sme.intr.read.vert"(%nxv4i32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}>
397    : (vector<[4]xi32>, vector<[4]xi1>, i32) -> vector<[4]xi32>
398  // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64
399  %res3 = "arm_sme.intr.read.vert"(%nxv2i64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}>
400    : (vector<[2]xi64>, vector<[2]xi1>, i32) -> vector<[2]xi64>
401  // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.vert.nxv1i128
402  %res4 = "arm_sme.intr.read.vert"(%nxv1i128, %nxv1i1, %tileslice) <{tile_id = 0 : i32}>
403    : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128>
404  // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16
405  %res5 = "arm_sme.intr.read.vert"(%nxv8f16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
406    : (vector<[8]xf16>, vector<[8]xi1>, i32) -> vector<[8]xf16>
407  // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16
408  %res6 = "arm_sme.intr.read.vert"(%nxv8bf16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}>
409    : (vector<[8]xbf16>, vector<[8]xi1>, i32) -> vector<[8]xbf16>
410  // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32
411  %res7 = "arm_sme.intr.read.vert"(%nxv4f32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}>
412    : (vector<[4]xf32>, vector<[4]xi1>, i32) -> vector<[4]xf32>
413  // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64
414  %res8 = "arm_sme.intr.read.vert"(%nxv2f64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}>
415    : (vector<[2]xf64>, vector<[2]xi1>, i32) -> vector<[2]xf64>
416  llvm.return
417}
418
419// -----
420
421llvm.func @arm_sme_streaming_vl() {
422  // CHECK: call i64 @llvm.aarch64.sme.cntsb()
423  %svl_b = "arm_sme.intr.cntsb"() : () -> i64
424  // CHECK: call i64 @llvm.aarch64.sme.cntsh()
425  %svl_h = "arm_sme.intr.cntsh"() : () -> i64
426  // CHECK: call i64 @llvm.aarch64.sme.cntsw()
427  %svl_w = "arm_sme.intr.cntsw"() : () -> i64
428  // CHECK: call i64 @llvm.aarch64.sme.cntsd()
429  %svl_d = "arm_sme.intr.cntsd"() : () -> i64
430  llvm.return
431}
432