xref: /llvm-project/mlir/test/Dialect/ArmSME/roundtrip.mlir (revision c42512436b23ab50e7637f239abe8371407104a1)
1// RUN: mlir-opt -split-input-file -verify-diagnostics %s | mlir-opt | FileCheck %s
2
3//===----------------------------------------------------------------------===//
4// arm_sme.get_tile
5//===----------------------------------------------------------------------===//
6
7
8func.func @arm_sme_get_tile_i8() {
9  // CHECK: arm_sme.get_tile : vector<[16]x[16]xi8>
10  %0 = arm_sme.get_tile : vector<[16]x[16]xi8>
11  return
12}
13
14// -----
15
16func.func @arm_sme_get_tile_i16() {
17  // CHECK: arm_sme.get_tile : vector<[8]x[8]xi16>
18  %0 = arm_sme.get_tile : vector<[8]x[8]xi16>
19  return
20}
21
22// -----
23
24func.func @arm_sme_get_tile_i32() {
25  // CHECK: arm_sme.get_tile : vector<[4]x[4]xi32>
26  %0 = arm_sme.get_tile : vector<[4]x[4]xi32>
27  return
28}
29
30// -----
31
32func.func @arm_sme_get_tile_i64() {
33  // CHECK: arm_sme.get_tile : vector<[2]x[2]xi64>
34  %0 = arm_sme.get_tile : vector<[2]x[2]xi64>
35  return
36}
37
38// -----
39
40func.func @arm_sme_get_tile_i128() {
41  // CHECK: arm_sme.get_tile : vector<[1]x[1]xi128>
42  %0 = arm_sme.get_tile : vector<[1]x[1]xi128>
43  return
44}
45
46// -----
47
48func.func @arm_sme_get_tile_f16() {
49  // CHECK: arm_sme.get_tile : vector<[8]x[8]xf16>
50  %0 = arm_sme.get_tile : vector<[8]x[8]xf16>
51  return
52}
53
54// -----
55
56func.func @arm_sme_get_tile_bf16() {
57  // CHECK: arm_sme.get_tile : vector<[8]x[8]xbf16>
58  %0 = arm_sme.get_tile : vector<[8]x[8]xbf16>
59  return
60}
61
62// -----
63
64func.func @arm_sme_get_tile_f32() {
65  // CHECK: arm_sme.get_tile : vector<[4]x[4]xf32>
66  %0 = arm_sme.get_tile : vector<[4]x[4]xf32>
67  return
68}
69
70// -----
71
72func.func @arm_sme_get_tile_f64() {
73  // CHECK: arm_sme.get_tile : vector<[2]x[2]xf64>
74  %0 = arm_sme.get_tile : vector<[2]x[2]xf64>
75  return
76}
77
78//===----------------------------------------------------------------------===//
79// arm_sme.zero
80//===----------------------------------------------------------------------===//
81
82// -----
83
84func.func @arm_sme_zero_i8() {
85  // CHECK: arm_sme.zero : vector<[16]x[16]xi8>
86  %0 = arm_sme.zero : vector<[16]x[16]xi8>
87  return
88}
89
90// -----
91
92func.func @arm_sme_zero_i16() {
93  // CHECK: arm_sme.zero : vector<[8]x[8]xi16>
94  %0 = arm_sme.zero : vector<[8]x[8]xi16>
95  return
96}
97
98// -----
99
100func.func @arm_sme_zero_i32() {
101  // CHECK: arm_sme.zero : vector<[4]x[4]xi32>
102  %0 = arm_sme.zero : vector<[4]x[4]xi32>
103  return
104}
105
106// -----
107
108func.func @arm_sme_zero_i64() {
109  // CHECK: arm_sme.zero : vector<[2]x[2]xi64>
110  %0 = arm_sme.zero : vector<[2]x[2]xi64>
111  return
112}
113
114// -----
115
116func.func @arm_sme_zero_i128() {
117  // CHECK: arm_sme.zero : vector<[1]x[1]xi128>
118  %0 = arm_sme.zero : vector<[1]x[1]xi128>
119  return
120}
121
122// -----
123
124func.func @arm_sme_zero_f16() {
125  // CHECK: arm_sme.zero : vector<[8]x[8]xf16>
126  %0 = arm_sme.zero : vector<[8]x[8]xf16>
127  return
128}
129
130// -----
131
132func.func @arm_sme_zero_bf16() {
133  // CHECK: arm_sme.zero : vector<[8]x[8]xbf16>
134  %0 = arm_sme.zero : vector<[8]x[8]xbf16>
135  return
136}
137
138// -----
139
140func.func @arm_sme_zero_f32() {
141  // CHECK: arm_sme.zero : vector<[4]x[4]xf32>
142  %0 = arm_sme.zero : vector<[4]x[4]xf32>
143  return
144}
145
146// -----
147
148func.func @arm_sme_zero_f64() {
149  // CHECK: arm_sme.zero : vector<[2]x[2]xf64>
150  %0 = arm_sme.zero : vector<[2]x[2]xf64>
151  return
152}
153
154//===----------------------------------------------------------------------===//
155// arm_sme.tile_load
156//===----------------------------------------------------------------------===//
157
158// -----
159
160func.func @arm_sme_tile_load_hor_i8(%src : memref<?x?xi8>) {
161  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8>
162  %c0 = arith.constant 0 : index
163  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
164  return
165}
166
167// -----
168
169func.func @arm_sme_tile_load_hor_i16(%src : memref<?x?xi16>) {
170  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]x[8]xi16>
171  %c0 = arith.constant 0 : index
172  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
173  return
174}
175
176// -----
177
178func.func @arm_sme_tile_load_hor_i32(%src : memref<?x?xi32>) {
179  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]x[4]xi32>
180  %c0 = arith.constant 0 : index
181  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
182  return
183}
184
185// -----
186
187func.func @arm_sme_tile_load_hor_i64(%src : memref<?x?xi64>) {
188  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]x[2]xi64>
189  %c0 = arith.constant 0 : index
190  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
191  return
192}
193
194// -----
195
196func.func @arm_sme_tile_load_hor_i128(%src : memref<?x?xi128>) {
197  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]x[1]xi128>
198  %c0 = arith.constant 0 : index
199  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
200  return
201}
202
203// -----
204
205func.func @arm_sme_tile_load_hor_f16(%src : memref<?x?xf16>) {
206  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]x[8]xf16>
207  %c0 = arith.constant 0 : index
208  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
209  return
210}
211
212// -----
213
214func.func @arm_sme_tile_load_hor_bf16(%src : memref<?x?xbf16>) {
215  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
216  %c0 = arith.constant 0 : index
217  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
218  return
219}
220
221// -----
222
223func.func @arm_sme_tile_load_hor_f32(%src : memref<?x?xf32>) {
224  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]x[4]xf32>
225  %c0 = arith.constant 0 : index
226  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
227  return
228}
229
230// -----
231
232func.func @arm_sme_tile_load_hor_f64(%src : memref<?x?xf64>) {
233  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]x[2]xf64>
234  %c0 = arith.constant 0 : index
235  %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
236  return
237}
238
239// -----
240
241func.func @arm_sme_tile_load_ver_i8(%src : memref<?x?xi8>) {
242  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
243  %c0 = arith.constant 0 : index
244  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
245  return
246}
247
248// -----
249
250func.func @arm_sme_tile_load_ver_i16(%src : memref<?x?xi16>) {
251  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16>
252  %c0 = arith.constant 0 : index
253  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16>
254  return
255}
256
257// -----
258
259func.func @arm_sme_tile_load_ver_i32(%src : memref<?x?xi32>) {
260  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
261  %c0 = arith.constant 0 : index
262  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
263  return
264}
265
266// -----
267
268func.func @arm_sme_tile_load_ver_i64(%src : memref<?x?xi64>) {
269  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
270  %c0 = arith.constant 0 : index
271  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
272  return
273}
274
275// -----
276
277func.func @arm_sme_tile_load_ver_i128(%src : memref<?x?xi128>) {
278  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128>
279  %c0 = arith.constant 0 : index
280  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128>
281  return
282}
283
284// -----
285
286func.func @arm_sme_tile_load_ver_f16(%src : memref<?x?xf16>) {
287  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16>
288  %c0 = arith.constant 0 : index
289  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16>
290  return
291}
292
293// -----
294
295func.func @arm_sme_tile_load_ver_bf16(%src : memref<?x?xbf16>) {
296  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
297  %c0 = arith.constant 0 : index
298  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
299  return
300}
301
302// -----
303
304func.func @arm_sme_tile_load_ver_f32(%src : memref<?x?xf32>) {
305  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
306  %c0 = arith.constant 0 : index
307  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
308  return
309}
310
311// -----
312
313func.func @arm_sme_tile_load_ver_f64(%src : memref<?x?xf64>) {
314  // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64>
315  %c0 = arith.constant 0 : index
316  %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64>
317  return
318}
319
320// -----
321
322/// Padding and mask are optional
323func.func @arm_sme_tile_load_hor_pad_f64(%src : memref<?x?xf64>, %pad : f64, %mask : vector<[2]x[2]xi1>) {
324  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}], {{.*}}, {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
325  %c0 = arith.constant 0 : index
326  %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref<?x?xf64>, vector<[2]x[2]xf64>
327  return
328}
329
330// -----
331
332/// Layout is optional and horizontal is the default, verify it's still parsed.
333func.func @arm_sme_tile_load_explicit_hor(%src : memref<?x?xi8>) {
334  // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8>
335  %c0 = arith.constant 0 : index
336  %tile = arm_sme.tile_load %src[%c0, %c0] layout<horizontal> : memref<?x?xi8>, vector<[16]x[16]xi8>
337  return
338}
339
340//===----------------------------------------------------------------------===//
341// arm_sme.tile_store
342//===----------------------------------------------------------------------===//
343
344// -----
345
346func.func @arm_sme_tile_store_hor_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) {
347  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8>
348  %c0 = arith.constant 0 : index
349  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
350  return
351}
352
353// -----
354
355func.func @arm_sme_tile_store_hor_i16(%tile : vector<[8]x[8]xi16>, %dest : memref<?x?xi16>) {
356  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]x[8]xi16>
357  %c0 = arith.constant 0 : index
358  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
359  return
360}
361
362// -----
363
364func.func @arm_sme_tile_store_hor_i32(%tile : vector<[4]x[4]xi32>, %dest : memref<?x?xi32>) {
365  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]x[4]xi32>
366  %c0 = arith.constant 0 : index
367  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
368  return
369}
370
371// -----
372
373func.func @arm_sme_tile_store_hor_i64(%tile : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) {
374  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]x[2]xi64>
375  %c0 = arith.constant 0 : index
376  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
377  return
378}
379
380// -----
381
382func.func @arm_sme_tile_store_hor_i128(%tile : vector<[1]x[1]xi128>, %dest : memref<?x?xi128>) {
383  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]x[1]xi128>
384  %c0 = arith.constant 0 : index
385  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
386  return
387}
388
389// -----
390
391func.func @arm_sme_tile_store_hor_f16(%tile : vector<[8]x[8]xf16>, %dest : memref<?x?xf16>) {
392  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]x[8]xf16>
393  %c0 = arith.constant 0 : index
394  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
395  return
396}
397
398// -----
399
400func.func @arm_sme_tile_store_hor_bf16(%tile : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>) {
401  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
402  %c0 = arith.constant 0 : index
403  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
404  return
405}
406
407// -----
408
409func.func @arm_sme_tile_store_hor_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>) {
410  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]x[4]xf32>
411  %c0 = arith.constant 0 : index
412  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
413  return
414}
415
416// -----
417
418func.func @arm_sme_tile_store_hor_f64(%tile : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>) {
419  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]x[2]xf64>
420  %c0 = arith.constant 0 : index
421  arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
422  return
423}
424
425// -----
426
427func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) {
428  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
429  %c0 = arith.constant 0 : index
430  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
431  return
432}
433
434// -----
435
436func.func @arm_sme_tile_store_ver_i16(%tile : vector<[8]x[8]xi16>, %dest : memref<?x?xi16>) {
437  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16>
438  %c0 = arith.constant 0 : index
439  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16>
440  return
441}
442
443// -----
444
445func.func @arm_sme_tile_store_ver_i32(%tile : vector<[4]x[4]xi32>, %dest : memref<?x?xi32>) {
446  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
447  %c0 = arith.constant 0 : index
448  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
449  return
450}
451
452// -----
453
454func.func @arm_sme_tile_store_ver_i64(%tile : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) {
455  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
456  %c0 = arith.constant 0 : index
457  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
458  return
459}
460
461// -----
462
463func.func @arm_sme_tile_store_ver_i128(%tile : vector<[1]x[1]xi128>, %dest : memref<?x?xi128>) {
464  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128>
465  %c0 = arith.constant 0 : index
466  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128>
467  return
468}
469
470// -----
471
472func.func @arm_sme_tile_store_ver_f16(%tile : vector<[8]x[8]xf16>, %dest : memref<?x?xf16>) {
473  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16>
474  %c0 = arith.constant 0 : index
475  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16>
476  return
477}
478
479// -----
480
481func.func @arm_sme_tile_store_ver_bf16(%tile : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>) {
482  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
483  %c0 = arith.constant 0 : index
484  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
485  return
486}
487
488// -----
489
490func.func @arm_sme_tile_store_ver_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>) {
491  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
492  %c0 = arith.constant 0 : index
493  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
494  return
495}
496
497// -----
498
499func.func @arm_sme_tile_store_ver_f64(%tile : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>) {
500  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64>
501  %c0 = arith.constant 0 : index
502  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64>
503  return
504}
505
506// -----
507
508func.func @arm_sme_tile_store_with_mask_ver_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>, %mask : vector<[4]x[4]xi1>) {
509  // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
510  %c0 = arith.constant 0 : index
511  arm_sme.tile_store %tile, %dest[%c0, %c0], %mask layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
512  return
513}
514
515// -----
516
517/// Layout is optional and horizontal is the default, verify it's still parsed.
518func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) {
519  // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8>
520  %c0 = arith.constant 0 : index
521  arm_sme.tile_store %tile, %dest[%c0, %c0] layout<horizontal> : memref<?x?xi8>, vector<[16]x[16]xi8>
522  return
523}
524
525//===----------------------------------------------------------------------===//
526// arm_sme.load_tile_slice
527//===----------------------------------------------------------------------===//
528
529// -----
530
531func.func @arm_sme_load_tile_slice_hor_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) {
532  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
533  %c0 = arith.constant 0 : index
534  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
535  return
536}
537
538// -----
539
540func.func @arm_sme_load_tile_slice_hor_i16(%src : memref<?x?xi16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) {
541  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
542  %c0 = arith.constant 0 : index
543  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
544  return
545}
546
547// -----
548
549func.func @arm_sme_load_tile_slice_hor_i32(%src : memref<?x?xi32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) {
550  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
551  %c0 = arith.constant 0 : index
552  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
553  return
554}
555
556// -----
557
558func.func @arm_sme_load_tile_slice_hor_i64(%src : memref<?x?xi64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) {
559  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
560  %c0 = arith.constant 0 : index
561  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
562  return
563}
564
565// -----
566
567func.func @arm_sme_load_tile_slice_hor_i128(%src : memref<?x?xi128>, %mask : vector<[1]xi1>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) {
568  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
569  %c0 = arith.constant 0 : index
570  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
571  return
572}
573
574// -----
575
576func.func @arm_sme_load_tile_slice_hor_f16(%src : memref<?x?xf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) {
577  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
578  %c0 = arith.constant 0 : index
579  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
580  return
581}
582
583// -----
584
585func.func @arm_sme_load_tile_slice_hor_bf16(%src : memref<?x?xbf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) {
586  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
587  %c0 = arith.constant 0 : index
588  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
589  return
590}
591
592// -----
593
594func.func @arm_sme_load_tile_slice_hor_f32(%src : memref<?x?xf32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) {
595  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
596  %c0 = arith.constant 0 : index
597  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
598  return
599}
600
601// -----
602
603func.func @arm_sme_load_tile_slice_hor_f64(%src : memref<?x?xf64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) {
604  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
605  %c0 = arith.constant 0 : index
606  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
607  return
608}
609
610// -----
611
612func.func @arm_sme_load_tile_slice_ver_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) {
613  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
614  %c0 = arith.constant 0 : index
615  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
616  return
617}
618
619// -----
620
621func.func @arm_sme_load_tile_slice_ver_i16(%src : memref<?x?xi16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) {
622  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
623  %c0 = arith.constant 0 : index
624  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
625  return
626}
627
628// -----
629
630func.func @arm_sme_load_tile_slice_ver_i32(%src : memref<?x?xi32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) {
631  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
632  %c0 = arith.constant 0 : index
633  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
634  return
635}
636
637// -----
638
639func.func @arm_sme_load_tile_slice_ver_i64(%src : memref<?x?xi64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) {
640  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
641  %c0 = arith.constant 0 : index
642  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
643  return
644}
645
646// -----
647
648func.func @arm_sme_load_tile_slice_ver_i128(%src : memref<?x?xi128>, %mask : vector<[1]xi1>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) {
649  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
650  %c0 = arith.constant 0 : index
651  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
652  return
653}
654
655// -----
656
657func.func @arm_sme_load_tile_slice_ver_f16(%src : memref<?x?xf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) {
658  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
659  %c0 = arith.constant 0 : index
660  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
661  return
662}
663
664// -----
665
666func.func @arm_sme_load_tile_slice_ver_bf16(%src : memref<?x?xbf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) {
667  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
668  %c0 = arith.constant 0 : index
669  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
670  return
671}
672
673// -----
674
675func.func @arm_sme_load_tile_slice_ver_f32(%src : memref<?x?xf32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) {
676  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
677  %c0 = arith.constant 0 : index
678  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
679  return
680}
681
682// -----
683
684func.func @arm_sme_load_tile_slice_ver_f64(%src : memref<?x?xf64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) {
685  // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
686  %c0 = arith.constant 0 : index
687  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
688  return
689}
690
691// -----
692
693/// Layout is optional and horizontal is the default, verify it's still parsed.
694func.func @arm_sme_load_tile_slice_hor_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) {
695  // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
696  %c0 = arith.constant 0 : index
697  %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<horizontal> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
698  return
699}
700
701//===----------------------------------------------------------------------===//
702// arm_sme.store_tile_slice
703//===----------------------------------------------------------------------===//
704
705// -----
706
707func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () {
708  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
709  %c0 = arith.constant 0 : index
710  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
711  return
712}
713
714// -----
715
716func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xi16>) -> () {
717  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
718  %c0 = arith.constant 0 : index
719  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
720  return
721}
722
723// -----
724
725func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xi32>) -> () {
726  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
727  %c0 = arith.constant 0 : index
728  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
729  return
730}
731
732// -----
733
734func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xi64>) -> () {
735  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
736  %c0 = arith.constant 0 : index
737  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
738  return
739}
740
741// -----
742
743func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref<?x?xi128>) -> () {
744  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
745  %c0 = arith.constant 0 : index
746  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
747  return
748}
749
750// -----
751
752func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xf16>) -> () {
753  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
754  %c0 = arith.constant 0 : index
755  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
756  return
757}
758
759// -----
760
761func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xbf16>) -> () {
762  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
763  %c0 = arith.constant 0 : index
764  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
765  return
766}
767
768// -----
769
770func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xf32>) -> () {
771  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
772  %c0 = arith.constant 0 : index
773  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
774  return
775}
776
777// -----
778
779func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xf64>) -> () {
780  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
781  %c0 = arith.constant 0 : index
782  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
783  return
784}
785
786// -----
787
788func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () {
789  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
790  %c0 = arith.constant 0 : index
791  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
792  return
793}
794
795// -----
796
797func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xi16>) -> () {
798  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
799  %c0 = arith.constant 0 : index
800  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16>
801  return
802}
803
804// -----
805
806func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xi32>) -> () {
807  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
808  %c0 = arith.constant 0 : index
809  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
810  return
811}
812
813// -----
814
815func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xi64>) -> () {
816  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
817  %c0 = arith.constant 0 : index
818  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64>
819  return
820}
821
822// -----
823
824func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref<?x?xi128>) -> () {
825  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
826  %c0 = arith.constant 0 : index
827  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128>
828  return
829}
830
831// -----
832
833func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xf16>) -> () {
834  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
835  %c0 = arith.constant 0 : index
836  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16>
837  return
838}
839
840// -----
841
842func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xbf16>) -> () {
843  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
844  %c0 = arith.constant 0 : index
845  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16>
846  return
847}
848
849// -----
850
851func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xf32>) -> () {
852  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
853  %c0 = arith.constant 0 : index
854  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
855  return
856}
857
858// -----
859
860func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xf64>) -> () {
861  // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
862  %c0 = arith.constant 0 : index
863  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64>
864  return
865}
866
867// -----
868
869/// Layout is optional and horizontal is the default, verify it's still parsed.
870func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () {
871  // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
872  %c0 = arith.constant 0 : index
873  arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<horizontal> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8>
874  return
875}
876
877//===----------------------------------------------------------------------===//
878// arm_sme.insert_tile_slice
879//===----------------------------------------------------------------------===//
880
881// -----
882
883func.func @arm_sme_insert_tile_slice_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () {
884  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8>
885  %c0 = arith.constant 0 : index
886  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[16]xi8> into vector<[16]x[16]xi8>
887  return
888}
889
890// -----
891
892func.func @arm_sme_insert_tile_slice_i16(%vector : vector<[8]xi16>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> () {
893  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16>
894  %c0 = arith.constant 0 : index
895  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xi16> into vector<[8]x[8]xi16>
896  return
897}
898
899// -----
900
901func.func @arm_sme_insert_tile_slice_i32(%vector : vector<[4]xi32>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> () {
902  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32>
903  %c0 = arith.constant 0 : index
904  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xi32> into vector<[4]x[4]xi32>
905  return
906}
907
908// -----
909
910func.func @arm_sme_insert_tile_slice_i64(%vector : vector<[2]xi64>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> () {
911  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64>
912  %c0 = arith.constant 0 : index
913  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xi64> into vector<[2]x[2]xi64>
914  return
915}
916
917// -----
918
919func.func @arm_sme_insert_tile_slice_i128(%vector : vector<[1]xi128>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> () {
920  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128>
921  %c0 = arith.constant 0 : index
922  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[1]xi128> into vector<[1]x[1]xi128>
923  return
924}
925
926// -----
927
928func.func @arm_sme_insert_tile_slice_f16(%vector : vector<[8]xf16>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> () {
929  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16>
930  %c0 = arith.constant 0 : index
931  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xf16> into vector<[8]x[8]xf16>
932  return
933}
934
935// -----
936
937func.func @arm_sme_insert_tile_slice_bf16(%vector : vector<[8]xbf16>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> () {
938  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16>
939  %c0 = arith.constant 0 : index
940  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xbf16> into vector<[8]x[8]xbf16>
941  return
942}
943
944// -----
945
946func.func @arm_sme_insert_tile_slice_f32(%vector : vector<[4]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> () {
947  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32>
948  %c0 = arith.constant 0 : index
949  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xf32> into vector<[4]x[4]xf32>
950  return
951}
952
953// -----
954
955func.func @arm_sme_insert_tile_slice_f64(%vector : vector<[2]xf64>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> () {
956  // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64>
957  %c0 = arith.constant 0 : index
958  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xf64> into vector<[2]x[2]xf64>
959  return
960}
961
962// -----
963
964func.func @arm_sme_insert_tile_slice_ver_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () {
965  // CHECK: arm_sme.insert_tile_slice {{.*}} layout<vertical> : vector<[16]xi8> into vector<[16]x[16]xi8>
966  %c0 = arith.constant 0 : index
967  arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] layout<vertical> : vector<[16]xi8> into vector<[16]x[16]xi8>
968  return
969}
970
971//===----------------------------------------------------------------------===//
972// arm_sme.extract_tile_slice
973//===----------------------------------------------------------------------===//
974
975// -----
976
977func.func @arm_sme_extract_tile_slice_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]xi8> {
978  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8>
979  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8>
980  return %slice : vector<[16]xi8>
981}
982
983// -----
984
985func.func @arm_sme_extract_tile_slice_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> vector<[8]xi16> {
986  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16>
987  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16>
988  return %slice : vector<[8]xi16>
989}
990
991// -----
992
993func.func @arm_sme_extract_tile_slice_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> vector<[4]xi32> {
994  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xi32> from vector<[4]x[4]xi32>
995  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32>
996  return %slice : vector<[4]xi32>
997}
998
999// -----
1000
1001func.func @arm_sme_extract_tile_slice_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> vector<[2]xi64> {
1002  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64>
1003  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64>
1004  return %slice : vector<[2]xi64>
1005}
1006
1007// -----
1008
1009func.func @arm_sme_extract_tile_slice_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> vector<[1]xi128> {
1010  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128>
1011  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128>
1012  return %slice : vector<[1]xi128>
1013}
1014
1015// -----
1016
1017func.func @arm_sme_extract_tile_slice_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> vector<[8]xf16> {
1018  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16>
1019  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16>
1020  return %slice : vector<[8]xf16>
1021}
1022
1023// -----
1024
1025func.func @arm_sme_extract_tile_slice_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> vector<[8]xbf16> {
1026  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1027  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1028  return %slice : vector<[8]xbf16>
1029}
1030
1031// -----
1032
1033func.func @arm_sme_extract_tile_slice_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]xf32> {
1034  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32>
1035  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32>
1036  return %slice : vector<[4]xf32>
1037}
1038
1039// -----
1040
1041func.func @arm_sme_extract_tile_slice_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> {
1042  // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64>
1043  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64>
1044  return %slice : vector<[2]xf64>
1045}
1046
1047// -----
1048
1049func.func @arm_sme_extract_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> {
1050  // CHECK: arm_sme.extract_tile_slice {{.*}} layout<vertical> : vector<[2]xf64> from vector<[2]x[2]xf64>
1051  %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] layout<vertical> : vector<[2]xf64> from vector<[2]x[2]xf64>
1052  return %slice : vector<[2]xf64>
1053}
1054
1055//===----------------------------------------------------------------------===//
1056// arm_sme.outerproduct
1057//===----------------------------------------------------------------------===//
1058
1059// -----
1060
1061func.func @arm_sme_outerproduct(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[8]x[8]xi16> {
1062  // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16>
1063  %result = arm_sme.outerproduct %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16>
1064  return %result : vector<[8]x[8]xi16>
1065}
1066
1067// -----
1068
1069func.func @arm_sme_outerproduct_with_masking(%vecA: vector<[4]xf32>, %vecB: vector<[4]xf32>, %maskA: vector<[4]xi1>, %maskB: vector<[4]xi1>) -> vector<[4]x[4]xf32> {
1070  // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} masks({{.*}}, {{.*}}) : vector<[4]xf32>, vector<[4]xf32>
1071  %result = arm_sme.outerproduct %vecA, %vecB masks(%maskA, %maskB) : vector<[4]xf32>, vector<[4]xf32>
1072  return %result : vector<[4]x[4]xf32>
1073}
1074
1075// -----
1076
1077func.func @arm_sme_outerproduct_with_acc(%vecA: vector<[2]xi64>, %vecB: vector<[2]xi64>, %acc: vector<[2]x[2]xi64>) -> vector<[2]x[2]xi64> {
1078  // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} acc({{.*}}) : vector<[2]xi64>, vector<[2]xi64>
1079  %result = arm_sme.outerproduct %vecA, %vecB acc(%acc) : vector<[2]xi64>, vector<[2]xi64>
1080  return %result : vector<[2]x[2]xi64>
1081}
1082
1083// -----
1084
1085func.func @arm_sme_outerproduct_with_kind(%vecA: vector<[2]xf64>, %vecB: vector<[2]xf64>) -> vector<[2]x[2]xf64>  {
1086  // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind<sub> : vector<[2]xf64>, vector<[2]xf64>
1087  %result = arm_sme.outerproduct %vecA, %vecB kind<sub> : vector<[2]xf64>, vector<[2]xf64>
1088  return %result : vector<[2]x[2]xf64>
1089}
1090
1091// -----
1092
1093func.func @arm_sme_outerproduct_with_everything(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>, %acc: vector<[16]x[16]xi8>, %maskA: vector<[16]xi1>, %maskB: vector<[16]xi1>) -> vector<[16]x[16]xi8> {
1094  // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind<sub> acc({{.*}}) masks({{.*}}, {{.*}}) : vector<[16]xi8>, vector<[16]xi8>
1095  %result = arm_sme.outerproduct %vecA, %vecB kind<sub> acc(%acc) masks(%maskA, %maskB) : vector<[16]xi8>, vector<[16]xi8>
1096  return %result : vector<[16]x[16]xi8>
1097}
1098
1099//===----------------------------------------------------------------------===//
1100// arm_sme.streaming_vl
1101//===----------------------------------------------------------------------===//
1102
1103// -----
1104
1105func.func @arm_sme_streaming_vl_bytes() -> index {
1106  // CHECK: arm_sme.streaming_vl <byte>
1107  %svl_b = arm_sme.streaming_vl <byte>
1108  return %svl_b : index
1109}
1110
1111// -----
1112
1113func.func @arm_sme_streaming_vl_half_words() -> index {
1114  // CHECK: arm_sme.streaming_vl <half>
1115  %svl_h = arm_sme.streaming_vl <half>
1116  return %svl_h : index
1117}
1118
1119// -----
1120
1121func.func @arm_sme_streaming_vl_words() -> index {
1122  // CHECK: arm_sme.streaming_vl <word>
1123  %svl_w = arm_sme.streaming_vl <word>
1124  return %svl_w : index
1125}
1126
1127// -----
1128
1129func.func @arm_sme_streaming_vl_double_words() -> index {
1130  // CHECK: arm_sme.streaming_vl <double>
1131  %svl_d = arm_sme.streaming_vl <double>
1132  return %svl_d : index
1133}
1134
1135//===----------------------------------------------------------------------===//
1136// arm_sme.fmopa_2way
1137//===----------------------------------------------------------------------===//
1138
1139// -----
1140
1141func.func @arm_sme_fmopa_2way_f16f16_to_f32(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>) -> vector<[4]x[4]xf32> {
1142  // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1143  %result = arm_sme.fmopa_2way %vecA, %vecB : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1144  return %result : vector<[4]x[4]xf32>
1145}
1146
1147// -----
1148
1149func.func @arm_sme_fmopa_2way_bf16bf16_to_f32(%vecA: vector<[8]xbf16>, %vecB: vector<[8]xbf16>) -> vector<[4]x[4]xf32> {
1150  // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1151  %result = arm_sme.fmopa_2way %vecA, %vecB : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1152  return %result : vector<[4]x[4]xf32>
1153}
1154
1155// -----
1156
1157func.func @arm_sme_fmopa_2way_with_masking(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %maskA: vector<[8]xi1>, %maskB: vector<[8]xi1>) -> vector<[4]x[4]xf32> {
1158  // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} masks({{.*}}, {{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1159  %result = arm_sme.fmopa_2way %vecA, %vecB masks(%maskA, %maskB) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1160  return %result : vector<[4]x[4]xf32>
1161}
1162
1163// -----
1164
1165func.func @arm_sme_fmopa_2way_with_acc(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %acc : vector<[4]x[4]xf32>) -> vector<[4]x[4]xf32> {
1166  // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} acc({{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1167  %result = arm_sme.fmopa_2way %vecA, %vecB acc(%acc) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1168  return %result : vector<[4]x[4]xf32>
1169}
1170
1171// -----
1172
1173func.func @arm_sme_fmopa_2way_with_everything(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %acc : vector<[4]x[4]xf32>, %maskA: vector<[8]xi1>, %maskB: vector<[8]xi1>) -> vector<[4]x[4]xf32> {
1174  // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} acc({{.*}}) masks({{.*}}, {{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1175  %result = arm_sme.fmopa_2way %vecA, %vecB acc(%acc) masks(%maskA, %maskB) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1176  return %result : vector<[4]x[4]xf32>
1177}
1178
1179//===----------------------------------------------------------------------===//
1180// arm_sme.fmops_2way
1181//===----------------------------------------------------------------------===//
1182
1183// -----
1184
1185func.func @arm_sme_fmops_2way_f16f16_to_f32(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>) -> vector<[4]x[4]xf32> {
1186  // CHECK: arm_sme.fmops_2way {{.*}}, {{.*}} : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1187  %result = arm_sme.fmops_2way %vecA, %vecB : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1188  return %result : vector<[4]x[4]xf32>
1189}
1190
1191// -----
1192
1193func.func @arm_sme_fmops_2way_bf16bf16_to_f32(%vecA: vector<[8]xbf16>, %vecB: vector<[8]xbf16>) -> vector<[4]x[4]xf32> {
1194  // CHECK: arm_sme.fmops_2way {{.*}}, {{.*}} : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1195  %result = arm_sme.fmops_2way %vecA, %vecB : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1196  return %result : vector<[4]x[4]xf32>
1197}
1198
1199//===----------------------------------------------------------------------===//
1200// arm_sme.smopa_2way
1201//===----------------------------------------------------------------------===//
1202
1203// -----
1204
1205func.func @arm_sme_smopa_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> {
1206  // CHECK: arm_sme.smopa_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1207  %result = arm_sme.smopa_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1208  return %result : vector<[4]x[4]xi32>
1209}
1210
1211//===----------------------------------------------------------------------===//
1212// arm_sme.smops_2way
1213//===----------------------------------------------------------------------===//
1214
1215// -----
1216
1217func.func @arm_sme_smops_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> {
1218  // CHECK: arm_sme.smops_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1219  %result = arm_sme.smops_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1220  return %result : vector<[4]x[4]xi32>
1221}
1222
1223//===----------------------------------------------------------------------===//
1224// arm_sme.umopa_2way
1225//===----------------------------------------------------------------------===//
1226
1227// -----
1228
1229func.func @arm_sme_umopa_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> {
1230  // CHECK: arm_sme.umopa_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1231  %result = arm_sme.umopa_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1232  return %result : vector<[4]x[4]xi32>
1233}
1234
1235//===----------------------------------------------------------------------===//
1236// arm_sme.umops_2way
1237//===----------------------------------------------------------------------===//
1238
1239// -----
1240
1241func.func @arm_sme_umops_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> {
1242  // CHECK: arm_sme.umops_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1243  %result = arm_sme.umops_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1244  return %result : vector<[4]x[4]xi32>
1245}
1246
1247//===----------------------------------------------------------------------===//
1248// arm_sme.smopa_4way
1249//===----------------------------------------------------------------------===//
1250
1251// -----
1252
1253func.func @arm_sme_smopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1254  // CHECK: arm_sme.smopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1255  %result = arm_sme.smopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1256  return %result : vector<[4]x[4]xi32>
1257}
1258
1259// -----
1260
1261func.func @arm_sme_smopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1262  // CHECK: arm_sme.smopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1263  %result = arm_sme.smopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1264  return %result : vector<[2]x[2]xi64>
1265}
1266
1267//===----------------------------------------------------------------------===//
1268// arm_sme.smops_4way
1269//===----------------------------------------------------------------------===//
1270
1271// -----
1272
1273func.func @arm_sme_smops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1274  // CHECK: arm_sme.smops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1275  %result = arm_sme.smops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1276  return %result : vector<[4]x[4]xi32>
1277}
1278
1279// -----
1280
1281func.func @arm_sme_smops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1282  // CHECK: arm_sme.smops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1283  %result = arm_sme.smops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1284  return %result : vector<[2]x[2]xi64>
1285}
1286
1287//===----------------------------------------------------------------------===//
1288// arm_sme.umopa_4way
1289//===----------------------------------------------------------------------===//
1290
1291// -----
1292
1293func.func @arm_sme_umopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1294  // CHECK: arm_sme.umopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1295  %result = arm_sme.umopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1296  return %result : vector<[4]x[4]xi32>
1297}
1298
1299// -----
1300
1301func.func @arm_sme_umopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1302  // CHECK: arm_sme.umopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1303  %result = arm_sme.umopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1304  return %result : vector<[2]x[2]xi64>
1305}
1306
1307//===----------------------------------------------------------------------===//
1308// arm_sme.umops_4way
1309//===----------------------------------------------------------------------===//
1310
1311// -----
1312
1313func.func @arm_sme_umops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1314  // CHECK: arm_sme.umops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1315  %result = arm_sme.umops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1316  return %result : vector<[4]x[4]xi32>
1317}
1318
1319// -----
1320
1321func.func @arm_sme_umops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1322  // CHECK: arm_sme.umops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1323  %result = arm_sme.umops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1324  return %result : vector<[2]x[2]xi64>
1325}
1326
1327//===----------------------------------------------------------------------===//
1328// arm_sme.sumopa_4way
1329//===----------------------------------------------------------------------===//
1330
1331// -----
1332
1333func.func @arm_sme_sumopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1334  // CHECK: arm_sme.sumopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1335  %result = arm_sme.sumopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1336  return %result : vector<[4]x[4]xi32>
1337}
1338
1339// -----
1340
1341func.func @arm_sme_sumopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1342  // CHECK: arm_sme.sumopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1343  %result = arm_sme.sumopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1344  return %result : vector<[2]x[2]xi64>
1345}
1346
1347//===----------------------------------------------------------------------===//
1348// arm_sme.sumops_4way
1349//===----------------------------------------------------------------------===//
1350
1351// -----
1352
1353func.func @arm_sme_sumops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1354  // CHECK: arm_sme.sumops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1355  %result = arm_sme.sumops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1356  return %result : vector<[4]x[4]xi32>
1357}
1358
1359// -----
1360
1361func.func @arm_sme_sumops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1362  // CHECK: arm_sme.sumops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1363  %result = arm_sme.sumops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1364  return %result : vector<[2]x[2]xi64>
1365}
1366
1367//===----------------------------------------------------------------------===//
1368// arm_sme.usmopa_4way
1369//===----------------------------------------------------------------------===//
1370
1371// -----
1372
1373func.func @arm_sme_usmopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1374  // CHECK: arm_sme.usmopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1375  %reuslt = arm_sme.usmopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1376  return %reuslt : vector<[4]x[4]xi32>
1377}
1378
1379// -----
1380
1381func.func @arm_sme_usmopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1382  // CHECK: arm_sme.usmopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1383  %reuslt = arm_sme.usmopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1384  return %reuslt : vector<[2]x[2]xi64>
1385}
1386
1387//===----------------------------------------------------------------------===//
1388// arm_sme.usmops_4way
1389//===----------------------------------------------------------------------===//
1390
1391// -----
1392
1393func.func @arm_sme_usmops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> {
1394  // CHECK: arm_sme.usmops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1395  %reuslt = arm_sme.usmops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32>
1396  return %reuslt : vector<[4]x[4]xi32>
1397}
1398
1399// -----
1400
1401func.func @arm_sme_usmops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> {
1402  // CHECK: arm_sme.usmops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1403  %reuslt = arm_sme.usmops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64>
1404  return %reuslt : vector<[2]x[2]xi64>
1405}
1406
1407//===----------------------------------------------------------------------===//
1408// arm_sme.copy_tile
1409//===----------------------------------------------------------------------===//
1410
1411func.func @arm_sme_copy_tile(%vec: vector<[4]x[4]xf32>) -> vector<[4]x[4]xf32> {
1412  %result = arm_sme.copy_tile %vec : vector<[4]x[4]xf32>
1413  return %result : vector<[4]x[4]xf32>
1414}
1415