1// RUN: mlir-opt -split-input-file -verify-diagnostics %s | mlir-opt | FileCheck %s 2 3//===----------------------------------------------------------------------===// 4// arm_sme.get_tile 5//===----------------------------------------------------------------------===// 6 7 8func.func @arm_sme_get_tile_i8() { 9 // CHECK: arm_sme.get_tile : vector<[16]x[16]xi8> 10 %0 = arm_sme.get_tile : vector<[16]x[16]xi8> 11 return 12} 13 14// ----- 15 16func.func @arm_sme_get_tile_i16() { 17 // CHECK: arm_sme.get_tile : vector<[8]x[8]xi16> 18 %0 = arm_sme.get_tile : vector<[8]x[8]xi16> 19 return 20} 21 22// ----- 23 24func.func @arm_sme_get_tile_i32() { 25 // CHECK: arm_sme.get_tile : vector<[4]x[4]xi32> 26 %0 = arm_sme.get_tile : vector<[4]x[4]xi32> 27 return 28} 29 30// ----- 31 32func.func @arm_sme_get_tile_i64() { 33 // CHECK: arm_sme.get_tile : vector<[2]x[2]xi64> 34 %0 = arm_sme.get_tile : vector<[2]x[2]xi64> 35 return 36} 37 38// ----- 39 40func.func @arm_sme_get_tile_i128() { 41 // CHECK: arm_sme.get_tile : vector<[1]x[1]xi128> 42 %0 = arm_sme.get_tile : vector<[1]x[1]xi128> 43 return 44} 45 46// ----- 47 48func.func @arm_sme_get_tile_f16() { 49 // CHECK: arm_sme.get_tile : vector<[8]x[8]xf16> 50 %0 = arm_sme.get_tile : vector<[8]x[8]xf16> 51 return 52} 53 54// ----- 55 56func.func @arm_sme_get_tile_bf16() { 57 // CHECK: arm_sme.get_tile : vector<[8]x[8]xbf16> 58 %0 = arm_sme.get_tile : vector<[8]x[8]xbf16> 59 return 60} 61 62// ----- 63 64func.func @arm_sme_get_tile_f32() { 65 // CHECK: arm_sme.get_tile : vector<[4]x[4]xf32> 66 %0 = arm_sme.get_tile : vector<[4]x[4]xf32> 67 return 68} 69 70// ----- 71 72func.func @arm_sme_get_tile_f64() { 73 // CHECK: arm_sme.get_tile : vector<[2]x[2]xf64> 74 %0 = arm_sme.get_tile : vector<[2]x[2]xf64> 75 return 76} 77 78//===----------------------------------------------------------------------===// 79// arm_sme.zero 80//===----------------------------------------------------------------------===// 81 82// ----- 83 84func.func @arm_sme_zero_i8() { 85 // CHECK: arm_sme.zero : vector<[16]x[16]xi8> 86 %0 = arm_sme.zero : vector<[16]x[16]xi8> 87 return 88} 89 90// ----- 91 92func.func @arm_sme_zero_i16() { 93 // CHECK: arm_sme.zero : vector<[8]x[8]xi16> 94 %0 = arm_sme.zero : vector<[8]x[8]xi16> 95 return 96} 97 98// ----- 99 100func.func @arm_sme_zero_i32() { 101 // CHECK: arm_sme.zero : vector<[4]x[4]xi32> 102 %0 = arm_sme.zero : vector<[4]x[4]xi32> 103 return 104} 105 106// ----- 107 108func.func @arm_sme_zero_i64() { 109 // CHECK: arm_sme.zero : vector<[2]x[2]xi64> 110 %0 = arm_sme.zero : vector<[2]x[2]xi64> 111 return 112} 113 114// ----- 115 116func.func @arm_sme_zero_i128() { 117 // CHECK: arm_sme.zero : vector<[1]x[1]xi128> 118 %0 = arm_sme.zero : vector<[1]x[1]xi128> 119 return 120} 121 122// ----- 123 124func.func @arm_sme_zero_f16() { 125 // CHECK: arm_sme.zero : vector<[8]x[8]xf16> 126 %0 = arm_sme.zero : vector<[8]x[8]xf16> 127 return 128} 129 130// ----- 131 132func.func @arm_sme_zero_bf16() { 133 // CHECK: arm_sme.zero : vector<[8]x[8]xbf16> 134 %0 = arm_sme.zero : vector<[8]x[8]xbf16> 135 return 136} 137 138// ----- 139 140func.func @arm_sme_zero_f32() { 141 // CHECK: arm_sme.zero : vector<[4]x[4]xf32> 142 %0 = arm_sme.zero : vector<[4]x[4]xf32> 143 return 144} 145 146// ----- 147 148func.func @arm_sme_zero_f64() { 149 // CHECK: arm_sme.zero : vector<[2]x[2]xf64> 150 %0 = arm_sme.zero : vector<[2]x[2]xf64> 151 return 152} 153 154//===----------------------------------------------------------------------===// 155// arm_sme.tile_load 156//===----------------------------------------------------------------------===// 157 158// ----- 159 160func.func @arm_sme_tile_load_hor_i8(%src : memref<?x?xi8>) { 161 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8> 162 %c0 = arith.constant 0 : index 163 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8> 164 return 165} 166 167// ----- 168 169func.func @arm_sme_tile_load_hor_i16(%src : memref<?x?xi16>) { 170 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]x[8]xi16> 171 %c0 = arith.constant 0 : index 172 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 173 return 174} 175 176// ----- 177 178func.func @arm_sme_tile_load_hor_i32(%src : memref<?x?xi32>) { 179 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]x[4]xi32> 180 %c0 = arith.constant 0 : index 181 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32> 182 return 183} 184 185// ----- 186 187func.func @arm_sme_tile_load_hor_i64(%src : memref<?x?xi64>) { 188 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]x[2]xi64> 189 %c0 = arith.constant 0 : index 190 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64> 191 return 192} 193 194// ----- 195 196func.func @arm_sme_tile_load_hor_i128(%src : memref<?x?xi128>) { 197 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]x[1]xi128> 198 %c0 = arith.constant 0 : index 199 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128> 200 return 201} 202 203// ----- 204 205func.func @arm_sme_tile_load_hor_f16(%src : memref<?x?xf16>) { 206 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]x[8]xf16> 207 %c0 = arith.constant 0 : index 208 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16> 209 return 210} 211 212// ----- 213 214func.func @arm_sme_tile_load_hor_bf16(%src : memref<?x?xbf16>) { 215 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]x[8]xbf16> 216 %c0 = arith.constant 0 : index 217 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16> 218 return 219} 220 221// ----- 222 223func.func @arm_sme_tile_load_hor_f32(%src : memref<?x?xf32>) { 224 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]x[4]xf32> 225 %c0 = arith.constant 0 : index 226 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32> 227 return 228} 229 230// ----- 231 232func.func @arm_sme_tile_load_hor_f64(%src : memref<?x?xf64>) { 233 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]x[2]xf64> 234 %c0 = arith.constant 0 : index 235 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64> 236 return 237} 238 239// ----- 240 241func.func @arm_sme_tile_load_ver_i8(%src : memref<?x?xi8>) { 242 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8> 243 %c0 = arith.constant 0 : index 244 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8> 245 return 246} 247 248// ----- 249 250func.func @arm_sme_tile_load_ver_i16(%src : memref<?x?xi16>) { 251 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16> 252 %c0 = arith.constant 0 : index 253 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16> 254 return 255} 256 257// ----- 258 259func.func @arm_sme_tile_load_ver_i32(%src : memref<?x?xi32>) { 260 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32> 261 %c0 = arith.constant 0 : index 262 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32> 263 return 264} 265 266// ----- 267 268func.func @arm_sme_tile_load_ver_i64(%src : memref<?x?xi64>) { 269 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64> 270 %c0 = arith.constant 0 : index 271 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64> 272 return 273} 274 275// ----- 276 277func.func @arm_sme_tile_load_ver_i128(%src : memref<?x?xi128>) { 278 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128> 279 %c0 = arith.constant 0 : index 280 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128> 281 return 282} 283 284// ----- 285 286func.func @arm_sme_tile_load_ver_f16(%src : memref<?x?xf16>) { 287 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16> 288 %c0 = arith.constant 0 : index 289 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16> 290 return 291} 292 293// ----- 294 295func.func @arm_sme_tile_load_ver_bf16(%src : memref<?x?xbf16>) { 296 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16> 297 %c0 = arith.constant 0 : index 298 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16> 299 return 300} 301 302// ----- 303 304func.func @arm_sme_tile_load_ver_f32(%src : memref<?x?xf32>) { 305 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 306 %c0 = arith.constant 0 : index 307 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 308 return 309} 310 311// ----- 312 313func.func @arm_sme_tile_load_ver_f64(%src : memref<?x?xf64>) { 314 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64> 315 %c0 = arith.constant 0 : index 316 %tile = arm_sme.tile_load %src[%c0, %c0] layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64> 317 return 318} 319 320// ----- 321 322/// Padding and mask are optional 323func.func @arm_sme_tile_load_hor_pad_f64(%src : memref<?x?xf64>, %pad : f64, %mask : vector<[2]x[2]xi1>) { 324 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}], {{.*}}, {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64> 325 %c0 = arith.constant 0 : index 326 %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref<?x?xf64>, vector<[2]x[2]xf64> 327 return 328} 329 330// ----- 331 332/// Layout is optional and horizontal is the default, verify it's still parsed. 333func.func @arm_sme_tile_load_explicit_hor(%src : memref<?x?xi8>) { 334 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8> 335 %c0 = arith.constant 0 : index 336 %tile = arm_sme.tile_load %src[%c0, %c0] layout<horizontal> : memref<?x?xi8>, vector<[16]x[16]xi8> 337 return 338} 339 340//===----------------------------------------------------------------------===// 341// arm_sme.tile_store 342//===----------------------------------------------------------------------===// 343 344// ----- 345 346func.func @arm_sme_tile_store_hor_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) { 347 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8> 348 %c0 = arith.constant 0 : index 349 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8> 350 return 351} 352 353// ----- 354 355func.func @arm_sme_tile_store_hor_i16(%tile : vector<[8]x[8]xi16>, %dest : memref<?x?xi16>) { 356 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]x[8]xi16> 357 %c0 = arith.constant 0 : index 358 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 359 return 360} 361 362// ----- 363 364func.func @arm_sme_tile_store_hor_i32(%tile : vector<[4]x[4]xi32>, %dest : memref<?x?xi32>) { 365 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]x[4]xi32> 366 %c0 = arith.constant 0 : index 367 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32> 368 return 369} 370 371// ----- 372 373func.func @arm_sme_tile_store_hor_i64(%tile : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) { 374 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]x[2]xi64> 375 %c0 = arith.constant 0 : index 376 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64> 377 return 378} 379 380// ----- 381 382func.func @arm_sme_tile_store_hor_i128(%tile : vector<[1]x[1]xi128>, %dest : memref<?x?xi128>) { 383 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]x[1]xi128> 384 %c0 = arith.constant 0 : index 385 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128> 386 return 387} 388 389// ----- 390 391func.func @arm_sme_tile_store_hor_f16(%tile : vector<[8]x[8]xf16>, %dest : memref<?x?xf16>) { 392 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]x[8]xf16> 393 %c0 = arith.constant 0 : index 394 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16> 395 return 396} 397 398// ----- 399 400func.func @arm_sme_tile_store_hor_bf16(%tile : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>) { 401 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]x[8]xbf16> 402 %c0 = arith.constant 0 : index 403 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16> 404 return 405} 406 407// ----- 408 409func.func @arm_sme_tile_store_hor_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>) { 410 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]x[4]xf32> 411 %c0 = arith.constant 0 : index 412 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32> 413 return 414} 415 416// ----- 417 418func.func @arm_sme_tile_store_hor_f64(%tile : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>) { 419 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]x[2]xf64> 420 %c0 = arith.constant 0 : index 421 arm_sme.tile_store %tile, %dest[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64> 422 return 423} 424 425// ----- 426 427func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) { 428 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8> 429 %c0 = arith.constant 0 : index 430 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8> 431 return 432} 433 434// ----- 435 436func.func @arm_sme_tile_store_ver_i16(%tile : vector<[8]x[8]xi16>, %dest : memref<?x?xi16>) { 437 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16> 438 %c0 = arith.constant 0 : index 439 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16> 440 return 441} 442 443// ----- 444 445func.func @arm_sme_tile_store_ver_i32(%tile : vector<[4]x[4]xi32>, %dest : memref<?x?xi32>) { 446 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32> 447 %c0 = arith.constant 0 : index 448 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32> 449 return 450} 451 452// ----- 453 454func.func @arm_sme_tile_store_ver_i64(%tile : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) { 455 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64> 456 %c0 = arith.constant 0 : index 457 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64> 458 return 459} 460 461// ----- 462 463func.func @arm_sme_tile_store_ver_i128(%tile : vector<[1]x[1]xi128>, %dest : memref<?x?xi128>) { 464 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128> 465 %c0 = arith.constant 0 : index 466 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128> 467 return 468} 469 470// ----- 471 472func.func @arm_sme_tile_store_ver_f16(%tile : vector<[8]x[8]xf16>, %dest : memref<?x?xf16>) { 473 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16> 474 %c0 = arith.constant 0 : index 475 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16> 476 return 477} 478 479// ----- 480 481func.func @arm_sme_tile_store_ver_bf16(%tile : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>) { 482 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16> 483 %c0 = arith.constant 0 : index 484 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16> 485 return 486} 487 488// ----- 489 490func.func @arm_sme_tile_store_ver_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>) { 491 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 492 %c0 = arith.constant 0 : index 493 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 494 return 495} 496 497// ----- 498 499func.func @arm_sme_tile_store_ver_f64(%tile : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>) { 500 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64> 501 %c0 = arith.constant 0 : index 502 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64> 503 return 504} 505 506// ----- 507 508func.func @arm_sme_tile_store_with_mask_ver_f32(%tile : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>, %mask : vector<[4]x[4]xi1>) { 509 // CHECK: arm_sme.tile_store {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 510 %c0 = arith.constant 0 : index 511 arm_sme.tile_store %tile, %dest[%c0, %c0], %mask layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32> 512 return 513} 514 515// ----- 516 517/// Layout is optional and horizontal is the default, verify it's still parsed. 518func.func @arm_sme_tile_store_ver_i8(%tile : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) { 519 // CHECK: arm_sme.tile_store %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8> 520 %c0 = arith.constant 0 : index 521 arm_sme.tile_store %tile, %dest[%c0, %c0] layout<horizontal> : memref<?x?xi8>, vector<[16]x[16]xi8> 522 return 523} 524 525//===----------------------------------------------------------------------===// 526// arm_sme.load_tile_slice 527//===----------------------------------------------------------------------===// 528 529// ----- 530 531func.func @arm_sme_load_tile_slice_hor_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { 532 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 533 %c0 = arith.constant 0 : index 534 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 535 return 536} 537 538// ----- 539 540func.func @arm_sme_load_tile_slice_hor_i16(%src : memref<?x?xi16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) { 541 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 542 %c0 = arith.constant 0 : index 543 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 544 return 545} 546 547// ----- 548 549func.func @arm_sme_load_tile_slice_hor_i32(%src : memref<?x?xi32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) { 550 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 551 %c0 = arith.constant 0 : index 552 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 553 return 554} 555 556// ----- 557 558func.func @arm_sme_load_tile_slice_hor_i64(%src : memref<?x?xi64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) { 559 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 560 %c0 = arith.constant 0 : index 561 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 562 return 563} 564 565// ----- 566 567func.func @arm_sme_load_tile_slice_hor_i128(%src : memref<?x?xi128>, %mask : vector<[1]xi1>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) { 568 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 569 %c0 = arith.constant 0 : index 570 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 571 return 572} 573 574// ----- 575 576func.func @arm_sme_load_tile_slice_hor_f16(%src : memref<?x?xf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) { 577 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 578 %c0 = arith.constant 0 : index 579 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 580 return 581} 582 583// ----- 584 585func.func @arm_sme_load_tile_slice_hor_bf16(%src : memref<?x?xbf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) { 586 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 587 %c0 = arith.constant 0 : index 588 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 589 return 590} 591 592// ----- 593 594func.func @arm_sme_load_tile_slice_hor_f32(%src : memref<?x?xf32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) { 595 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 596 %c0 = arith.constant 0 : index 597 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 598 return 599} 600 601// ----- 602 603func.func @arm_sme_load_tile_slice_hor_f64(%src : memref<?x?xf64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) { 604 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 605 %c0 = arith.constant 0 : index 606 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 607 return 608} 609 610// ----- 611 612func.func @arm_sme_load_tile_slice_ver_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { 613 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 614 %c0 = arith.constant 0 : index 615 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 616 return 617} 618 619// ----- 620 621func.func @arm_sme_load_tile_slice_ver_i16(%src : memref<?x?xi16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) { 622 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 623 %c0 = arith.constant 0 : index 624 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 625 return 626} 627 628// ----- 629 630func.func @arm_sme_load_tile_slice_ver_i32(%src : memref<?x?xi32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) { 631 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 632 %c0 = arith.constant 0 : index 633 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 634 return 635} 636 637// ----- 638 639func.func @arm_sme_load_tile_slice_ver_i64(%src : memref<?x?xi64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) { 640 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 641 %c0 = arith.constant 0 : index 642 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 643 return 644} 645 646// ----- 647 648func.func @arm_sme_load_tile_slice_ver_i128(%src : memref<?x?xi128>, %mask : vector<[1]xi1>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) { 649 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 650 %c0 = arith.constant 0 : index 651 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 652 return 653} 654 655// ----- 656 657func.func @arm_sme_load_tile_slice_ver_f16(%src : memref<?x?xf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) { 658 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 659 %c0 = arith.constant 0 : index 660 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 661 return 662} 663 664// ----- 665 666func.func @arm_sme_load_tile_slice_ver_bf16(%src : memref<?x?xbf16>, %mask : vector<[8]xi1>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) { 667 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 668 %c0 = arith.constant 0 : index 669 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 670 return 671} 672 673// ----- 674 675func.func @arm_sme_load_tile_slice_ver_f32(%src : memref<?x?xf32>, %mask : vector<[4]xi1>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) { 676 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 677 %c0 = arith.constant 0 : index 678 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 679 return 680} 681 682// ----- 683 684func.func @arm_sme_load_tile_slice_ver_f64(%src : memref<?x?xf64>, %mask : vector<[2]xi1>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) { 685 // CHECK: arm_sme.load_tile_slice {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 686 %c0 = arith.constant 0 : index 687 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 688 return 689} 690 691// ----- 692 693/// Layout is optional and horizontal is the default, verify it's still parsed. 694func.func @arm_sme_load_tile_slice_hor_i8(%src : memref<?x?xi8>, %mask : vector<[16]xi1>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) { 695 // CHECK: arm_sme.load_tile_slice %{{.*}}[{{.*}}], %{{.*}}, %{{.*}} : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 696 %c0 = arith.constant 0 : index 697 %tile_update = arm_sme.load_tile_slice %src[%c0], %mask, %tile, %tile_slice_index layout<horizontal> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 698 return 699} 700 701//===----------------------------------------------------------------------===// 702// arm_sme.store_tile_slice 703//===----------------------------------------------------------------------===// 704 705// ----- 706 707func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () { 708 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 709 %c0 = arith.constant 0 : index 710 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 711 return 712} 713 714// ----- 715 716func.func @arm_sme_store_tile_slice_hor_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xi16>) -> () { 717 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 718 %c0 = arith.constant 0 : index 719 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 720 return 721} 722 723// ----- 724 725func.func @arm_sme_store_tile_slice_hor_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xi32>) -> () { 726 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 727 %c0 = arith.constant 0 : index 728 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 729 return 730} 731 732// ----- 733 734func.func @arm_sme_store_tile_slice_hor_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xi64>) -> () { 735 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 736 %c0 = arith.constant 0 : index 737 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 738 return 739} 740 741// ----- 742 743func.func @arm_sme_store_tile_slice_hor_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref<?x?xi128>) -> () { 744 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 745 %c0 = arith.constant 0 : index 746 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 747 return 748} 749 750// ----- 751 752func.func @arm_sme_store_tile_slice_hor_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xf16>) -> () { 753 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 754 %c0 = arith.constant 0 : index 755 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 756 return 757} 758 759// ----- 760 761func.func @arm_sme_store_tile_slice_hor_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xbf16>) -> () { 762 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 763 %c0 = arith.constant 0 : index 764 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 765 return 766} 767 768// ----- 769 770func.func @arm_sme_store_tile_slice_hor_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xf32>) -> () { 771 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 772 %c0 = arith.constant 0 : index 773 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 774 return 775} 776 777// ----- 778 779func.func @arm_sme_store_tile_slice_hor_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xf64>) -> () { 780 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 781 %c0 = arith.constant 0 : index 782 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 783 return 784} 785 786// ----- 787 788func.func @arm_sme_store_tile_slice_ver_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () { 789 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 790 %c0 = arith.constant 0 : index 791 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 792 return 793} 794 795// ----- 796 797func.func @arm_sme_store_tile_slice_ver_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xi16>) -> () { 798 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 799 %c0 = arith.constant 0 : index 800 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi16>, vector<[8]xi1>, vector<[8]x[8]xi16> 801 return 802} 803 804// ----- 805 806func.func @arm_sme_store_tile_slice_ver_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xi32>) -> () { 807 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 808 %c0 = arith.constant 0 : index 809 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32> 810 return 811} 812 813// ----- 814 815func.func @arm_sme_store_tile_slice_ver_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xi64>) -> () { 816 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 817 %c0 = arith.constant 0 : index 818 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi64>, vector<[2]xi1>, vector<[2]x[2]xi64> 819 return 820} 821 822// ----- 823 824func.func @arm_sme_store_tile_slice_ver_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index, %mask : vector<[1]xi1>, %dest : memref<?x?xi128>) -> () { 825 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 826 %c0 = arith.constant 0 : index 827 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xi128>, vector<[1]xi1>, vector<[1]x[1]xi128> 828 return 829} 830 831// ----- 832 833func.func @arm_sme_store_tile_slice_ver_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xf16>) -> () { 834 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 835 %c0 = arith.constant 0 : index 836 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf16>, vector<[8]xi1>, vector<[8]x[8]xf16> 837 return 838} 839 840// ----- 841 842func.func @arm_sme_store_tile_slice_ver_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index, %mask : vector<[8]xi1>, %dest : memref<?x?xbf16>) -> () { 843 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 844 %c0 = arith.constant 0 : index 845 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xbf16>, vector<[8]xi1>, vector<[8]x[8]xbf16> 846 return 847} 848 849// ----- 850 851func.func @arm_sme_store_tile_slice_ver_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index, %mask : vector<[4]xi1>, %dest : memref<?x?xf32>) -> () { 852 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 853 %c0 = arith.constant 0 : index 854 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> 855 return 856} 857 858// ----- 859 860func.func @arm_sme_store_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index, %mask : vector<[2]xi1>, %dest : memref<?x?xf64>) -> () { 861 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 862 %c0 = arith.constant 0 : index 863 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<vertical> : memref<?x?xf64>, vector<[2]xi1>, vector<[2]x[2]xf64> 864 return 865} 866 867// ----- 868 869/// Layout is optional and horizontal is the default, verify it's still parsed. 870func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index, %mask : vector<[16]xi1>, %dest : memref<?x?xi8>) -> () { 871 // CHECK: arm_sme.store_tile_slice {{.*}}, {{.*}}, %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 872 %c0 = arith.constant 0 : index 873 arm_sme.store_tile_slice %tile, %tile_slice_index, %mask, %dest[%c0] layout<horizontal> : memref<?x?xi8>, vector<[16]xi1>, vector<[16]x[16]xi8> 874 return 875} 876 877//===----------------------------------------------------------------------===// 878// arm_sme.insert_tile_slice 879//===----------------------------------------------------------------------===// 880 881// ----- 882 883func.func @arm_sme_insert_tile_slice_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { 884 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> 885 %c0 = arith.constant 0 : index 886 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[16]xi8> into vector<[16]x[16]xi8> 887 return 888} 889 890// ----- 891 892func.func @arm_sme_insert_tile_slice_i16(%vector : vector<[8]xi16>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> () { 893 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> 894 %c0 = arith.constant 0 : index 895 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xi16> into vector<[8]x[8]xi16> 896 return 897} 898 899// ----- 900 901func.func @arm_sme_insert_tile_slice_i32(%vector : vector<[4]xi32>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> () { 902 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> 903 %c0 = arith.constant 0 : index 904 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xi32> into vector<[4]x[4]xi32> 905 return 906} 907 908// ----- 909 910func.func @arm_sme_insert_tile_slice_i64(%vector : vector<[2]xi64>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> () { 911 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> 912 %c0 = arith.constant 0 : index 913 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xi64> into vector<[2]x[2]xi64> 914 return 915} 916 917// ----- 918 919func.func @arm_sme_insert_tile_slice_i128(%vector : vector<[1]xi128>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> () { 920 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> 921 %c0 = arith.constant 0 : index 922 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[1]xi128> into vector<[1]x[1]xi128> 923 return 924} 925 926// ----- 927 928func.func @arm_sme_insert_tile_slice_f16(%vector : vector<[8]xf16>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> () { 929 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> 930 %c0 = arith.constant 0 : index 931 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xf16> into vector<[8]x[8]xf16> 932 return 933} 934 935// ----- 936 937func.func @arm_sme_insert_tile_slice_bf16(%vector : vector<[8]xbf16>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> () { 938 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> 939 %c0 = arith.constant 0 : index 940 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xbf16> into vector<[8]x[8]xbf16> 941 return 942} 943 944// ----- 945 946func.func @arm_sme_insert_tile_slice_f32(%vector : vector<[4]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> () { 947 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> 948 %c0 = arith.constant 0 : index 949 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xf32> into vector<[4]x[4]xf32> 950 return 951} 952 953// ----- 954 955func.func @arm_sme_insert_tile_slice_f64(%vector : vector<[2]xf64>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> () { 956 // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> 957 %c0 = arith.constant 0 : index 958 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xf64> into vector<[2]x[2]xf64> 959 return 960} 961 962// ----- 963 964func.func @arm_sme_insert_tile_slice_ver_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { 965 // CHECK: arm_sme.insert_tile_slice {{.*}} layout<vertical> : vector<[16]xi8> into vector<[16]x[16]xi8> 966 %c0 = arith.constant 0 : index 967 arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] layout<vertical> : vector<[16]xi8> into vector<[16]x[16]xi8> 968 return 969} 970 971//===----------------------------------------------------------------------===// 972// arm_sme.extract_tile_slice 973//===----------------------------------------------------------------------===// 974 975// ----- 976 977func.func @arm_sme_extract_tile_slice_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]xi8> { 978 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> 979 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> 980 return %slice : vector<[16]xi8> 981} 982 983// ----- 984 985func.func @arm_sme_extract_tile_slice_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> vector<[8]xi16> { 986 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> 987 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16> 988 return %slice : vector<[8]xi16> 989} 990 991// ----- 992 993func.func @arm_sme_extract_tile_slice_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> vector<[4]xi32> { 994 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xi32> from vector<[4]x[4]xi32> 995 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32> 996 return %slice : vector<[4]xi32> 997} 998 999// ----- 1000 1001func.func @arm_sme_extract_tile_slice_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> vector<[2]xi64> { 1002 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> 1003 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64> 1004 return %slice : vector<[2]xi64> 1005} 1006 1007// ----- 1008 1009func.func @arm_sme_extract_tile_slice_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> vector<[1]xi128> { 1010 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> 1011 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128> 1012 return %slice : vector<[1]xi128> 1013} 1014 1015// ----- 1016 1017func.func @arm_sme_extract_tile_slice_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> vector<[8]xf16> { 1018 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> 1019 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16> 1020 return %slice : vector<[8]xf16> 1021} 1022 1023// ----- 1024 1025func.func @arm_sme_extract_tile_slice_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> vector<[8]xbf16> { 1026 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> 1027 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16> 1028 return %slice : vector<[8]xbf16> 1029} 1030 1031// ----- 1032 1033func.func @arm_sme_extract_tile_slice_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]xf32> { 1034 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> 1035 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> 1036 return %slice : vector<[4]xf32> 1037} 1038 1039// ----- 1040 1041func.func @arm_sme_extract_tile_slice_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { 1042 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> 1043 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64> 1044 return %slice : vector<[2]xf64> 1045} 1046 1047// ----- 1048 1049func.func @arm_sme_extract_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { 1050 // CHECK: arm_sme.extract_tile_slice {{.*}} layout<vertical> : vector<[2]xf64> from vector<[2]x[2]xf64> 1051 %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] layout<vertical> : vector<[2]xf64> from vector<[2]x[2]xf64> 1052 return %slice : vector<[2]xf64> 1053} 1054 1055//===----------------------------------------------------------------------===// 1056// arm_sme.outerproduct 1057//===----------------------------------------------------------------------===// 1058 1059// ----- 1060 1061func.func @arm_sme_outerproduct(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[8]x[8]xi16> { 1062 // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> 1063 %result = arm_sme.outerproduct %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> 1064 return %result : vector<[8]x[8]xi16> 1065} 1066 1067// ----- 1068 1069func.func @arm_sme_outerproduct_with_masking(%vecA: vector<[4]xf32>, %vecB: vector<[4]xf32>, %maskA: vector<[4]xi1>, %maskB: vector<[4]xi1>) -> vector<[4]x[4]xf32> { 1070 // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} masks({{.*}}, {{.*}}) : vector<[4]xf32>, vector<[4]xf32> 1071 %result = arm_sme.outerproduct %vecA, %vecB masks(%maskA, %maskB) : vector<[4]xf32>, vector<[4]xf32> 1072 return %result : vector<[4]x[4]xf32> 1073} 1074 1075// ----- 1076 1077func.func @arm_sme_outerproduct_with_acc(%vecA: vector<[2]xi64>, %vecB: vector<[2]xi64>, %acc: vector<[2]x[2]xi64>) -> vector<[2]x[2]xi64> { 1078 // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} acc({{.*}}) : vector<[2]xi64>, vector<[2]xi64> 1079 %result = arm_sme.outerproduct %vecA, %vecB acc(%acc) : vector<[2]xi64>, vector<[2]xi64> 1080 return %result : vector<[2]x[2]xi64> 1081} 1082 1083// ----- 1084 1085func.func @arm_sme_outerproduct_with_kind(%vecA: vector<[2]xf64>, %vecB: vector<[2]xf64>) -> vector<[2]x[2]xf64> { 1086 // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind<sub> : vector<[2]xf64>, vector<[2]xf64> 1087 %result = arm_sme.outerproduct %vecA, %vecB kind<sub> : vector<[2]xf64>, vector<[2]xf64> 1088 return %result : vector<[2]x[2]xf64> 1089} 1090 1091// ----- 1092 1093func.func @arm_sme_outerproduct_with_everything(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>, %acc: vector<[16]x[16]xi8>, %maskA: vector<[16]xi1>, %maskB: vector<[16]xi1>) -> vector<[16]x[16]xi8> { 1094 // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind<sub> acc({{.*}}) masks({{.*}}, {{.*}}) : vector<[16]xi8>, vector<[16]xi8> 1095 %result = arm_sme.outerproduct %vecA, %vecB kind<sub> acc(%acc) masks(%maskA, %maskB) : vector<[16]xi8>, vector<[16]xi8> 1096 return %result : vector<[16]x[16]xi8> 1097} 1098 1099//===----------------------------------------------------------------------===// 1100// arm_sme.streaming_vl 1101//===----------------------------------------------------------------------===// 1102 1103// ----- 1104 1105func.func @arm_sme_streaming_vl_bytes() -> index { 1106 // CHECK: arm_sme.streaming_vl <byte> 1107 %svl_b = arm_sme.streaming_vl <byte> 1108 return %svl_b : index 1109} 1110 1111// ----- 1112 1113func.func @arm_sme_streaming_vl_half_words() -> index { 1114 // CHECK: arm_sme.streaming_vl <half> 1115 %svl_h = arm_sme.streaming_vl <half> 1116 return %svl_h : index 1117} 1118 1119// ----- 1120 1121func.func @arm_sme_streaming_vl_words() -> index { 1122 // CHECK: arm_sme.streaming_vl <word> 1123 %svl_w = arm_sme.streaming_vl <word> 1124 return %svl_w : index 1125} 1126 1127// ----- 1128 1129func.func @arm_sme_streaming_vl_double_words() -> index { 1130 // CHECK: arm_sme.streaming_vl <double> 1131 %svl_d = arm_sme.streaming_vl <double> 1132 return %svl_d : index 1133} 1134 1135//===----------------------------------------------------------------------===// 1136// arm_sme.fmopa_2way 1137//===----------------------------------------------------------------------===// 1138 1139// ----- 1140 1141func.func @arm_sme_fmopa_2way_f16f16_to_f32(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>) -> vector<[4]x[4]xf32> { 1142 // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1143 %result = arm_sme.fmopa_2way %vecA, %vecB : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1144 return %result : vector<[4]x[4]xf32> 1145} 1146 1147// ----- 1148 1149func.func @arm_sme_fmopa_2way_bf16bf16_to_f32(%vecA: vector<[8]xbf16>, %vecB: vector<[8]xbf16>) -> vector<[4]x[4]xf32> { 1150 // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32> 1151 %result = arm_sme.fmopa_2way %vecA, %vecB : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32> 1152 return %result : vector<[4]x[4]xf32> 1153} 1154 1155// ----- 1156 1157func.func @arm_sme_fmopa_2way_with_masking(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %maskA: vector<[8]xi1>, %maskB: vector<[8]xi1>) -> vector<[4]x[4]xf32> { 1158 // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} masks({{.*}}, {{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1159 %result = arm_sme.fmopa_2way %vecA, %vecB masks(%maskA, %maskB) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1160 return %result : vector<[4]x[4]xf32> 1161} 1162 1163// ----- 1164 1165func.func @arm_sme_fmopa_2way_with_acc(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %acc : vector<[4]x[4]xf32>) -> vector<[4]x[4]xf32> { 1166 // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} acc({{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1167 %result = arm_sme.fmopa_2way %vecA, %vecB acc(%acc) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1168 return %result : vector<[4]x[4]xf32> 1169} 1170 1171// ----- 1172 1173func.func @arm_sme_fmopa_2way_with_everything(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>, %acc : vector<[4]x[4]xf32>, %maskA: vector<[8]xi1>, %maskB: vector<[8]xi1>) -> vector<[4]x[4]xf32> { 1174 // CHECK: arm_sme.fmopa_2way {{.*}}, {{.*}} acc({{.*}}) masks({{.*}}, {{.*}}) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1175 %result = arm_sme.fmopa_2way %vecA, %vecB acc(%acc) masks(%maskA, %maskB) : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1176 return %result : vector<[4]x[4]xf32> 1177} 1178 1179//===----------------------------------------------------------------------===// 1180// arm_sme.fmops_2way 1181//===----------------------------------------------------------------------===// 1182 1183// ----- 1184 1185func.func @arm_sme_fmops_2way_f16f16_to_f32(%vecA: vector<[8]xf16>, %vecB: vector<[8]xf16>) -> vector<[4]x[4]xf32> { 1186 // CHECK: arm_sme.fmops_2way {{.*}}, {{.*}} : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1187 %result = arm_sme.fmops_2way %vecA, %vecB : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32> 1188 return %result : vector<[4]x[4]xf32> 1189} 1190 1191// ----- 1192 1193func.func @arm_sme_fmops_2way_bf16bf16_to_f32(%vecA: vector<[8]xbf16>, %vecB: vector<[8]xbf16>) -> vector<[4]x[4]xf32> { 1194 // CHECK: arm_sme.fmops_2way {{.*}}, {{.*}} : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32> 1195 %result = arm_sme.fmops_2way %vecA, %vecB : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32> 1196 return %result : vector<[4]x[4]xf32> 1197} 1198 1199//===----------------------------------------------------------------------===// 1200// arm_sme.smopa_2way 1201//===----------------------------------------------------------------------===// 1202 1203// ----- 1204 1205func.func @arm_sme_smopa_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> { 1206 // CHECK: arm_sme.smopa_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1207 %result = arm_sme.smopa_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1208 return %result : vector<[4]x[4]xi32> 1209} 1210 1211//===----------------------------------------------------------------------===// 1212// arm_sme.smops_2way 1213//===----------------------------------------------------------------------===// 1214 1215// ----- 1216 1217func.func @arm_sme_smops_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> { 1218 // CHECK: arm_sme.smops_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1219 %result = arm_sme.smops_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1220 return %result : vector<[4]x[4]xi32> 1221} 1222 1223//===----------------------------------------------------------------------===// 1224// arm_sme.umopa_2way 1225//===----------------------------------------------------------------------===// 1226 1227// ----- 1228 1229func.func @arm_sme_umopa_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> { 1230 // CHECK: arm_sme.umopa_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1231 %result = arm_sme.umopa_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1232 return %result : vector<[4]x[4]xi32> 1233} 1234 1235//===----------------------------------------------------------------------===// 1236// arm_sme.umops_2way 1237//===----------------------------------------------------------------------===// 1238 1239// ----- 1240 1241func.func @arm_sme_umops_2way_i16i16_to_i32(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[4]x[4]xi32> { 1242 // CHECK: arm_sme.umops_2way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1243 %result = arm_sme.umops_2way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32> 1244 return %result : vector<[4]x[4]xi32> 1245} 1246 1247//===----------------------------------------------------------------------===// 1248// arm_sme.smopa_4way 1249//===----------------------------------------------------------------------===// 1250 1251// ----- 1252 1253func.func @arm_sme_smopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1254 // CHECK: arm_sme.smopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1255 %result = arm_sme.smopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1256 return %result : vector<[4]x[4]xi32> 1257} 1258 1259// ----- 1260 1261func.func @arm_sme_smopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1262 // CHECK: arm_sme.smopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1263 %result = arm_sme.smopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1264 return %result : vector<[2]x[2]xi64> 1265} 1266 1267//===----------------------------------------------------------------------===// 1268// arm_sme.smops_4way 1269//===----------------------------------------------------------------------===// 1270 1271// ----- 1272 1273func.func @arm_sme_smops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1274 // CHECK: arm_sme.smops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1275 %result = arm_sme.smops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1276 return %result : vector<[4]x[4]xi32> 1277} 1278 1279// ----- 1280 1281func.func @arm_sme_smops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1282 // CHECK: arm_sme.smops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1283 %result = arm_sme.smops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1284 return %result : vector<[2]x[2]xi64> 1285} 1286 1287//===----------------------------------------------------------------------===// 1288// arm_sme.umopa_4way 1289//===----------------------------------------------------------------------===// 1290 1291// ----- 1292 1293func.func @arm_sme_umopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1294 // CHECK: arm_sme.umopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1295 %result = arm_sme.umopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1296 return %result : vector<[4]x[4]xi32> 1297} 1298 1299// ----- 1300 1301func.func @arm_sme_umopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1302 // CHECK: arm_sme.umopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1303 %result = arm_sme.umopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1304 return %result : vector<[2]x[2]xi64> 1305} 1306 1307//===----------------------------------------------------------------------===// 1308// arm_sme.umops_4way 1309//===----------------------------------------------------------------------===// 1310 1311// ----- 1312 1313func.func @arm_sme_umops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1314 // CHECK: arm_sme.umops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1315 %result = arm_sme.umops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1316 return %result : vector<[4]x[4]xi32> 1317} 1318 1319// ----- 1320 1321func.func @arm_sme_umops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1322 // CHECK: arm_sme.umops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1323 %result = arm_sme.umops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1324 return %result : vector<[2]x[2]xi64> 1325} 1326 1327//===----------------------------------------------------------------------===// 1328// arm_sme.sumopa_4way 1329//===----------------------------------------------------------------------===// 1330 1331// ----- 1332 1333func.func @arm_sme_sumopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1334 // CHECK: arm_sme.sumopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1335 %result = arm_sme.sumopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1336 return %result : vector<[4]x[4]xi32> 1337} 1338 1339// ----- 1340 1341func.func @arm_sme_sumopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1342 // CHECK: arm_sme.sumopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1343 %result = arm_sme.sumopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1344 return %result : vector<[2]x[2]xi64> 1345} 1346 1347//===----------------------------------------------------------------------===// 1348// arm_sme.sumops_4way 1349//===----------------------------------------------------------------------===// 1350 1351// ----- 1352 1353func.func @arm_sme_sumops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1354 // CHECK: arm_sme.sumops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1355 %result = arm_sme.sumops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1356 return %result : vector<[4]x[4]xi32> 1357} 1358 1359// ----- 1360 1361func.func @arm_sme_sumops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1362 // CHECK: arm_sme.sumops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1363 %result = arm_sme.sumops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1364 return %result : vector<[2]x[2]xi64> 1365} 1366 1367//===----------------------------------------------------------------------===// 1368// arm_sme.usmopa_4way 1369//===----------------------------------------------------------------------===// 1370 1371// ----- 1372 1373func.func @arm_sme_usmopa_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1374 // CHECK: arm_sme.usmopa_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1375 %reuslt = arm_sme.usmopa_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1376 return %reuslt : vector<[4]x[4]xi32> 1377} 1378 1379// ----- 1380 1381func.func @arm_sme_usmopa_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1382 // CHECK: arm_sme.usmopa_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1383 %reuslt = arm_sme.usmopa_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1384 return %reuslt : vector<[2]x[2]xi64> 1385} 1386 1387//===----------------------------------------------------------------------===// 1388// arm_sme.usmops_4way 1389//===----------------------------------------------------------------------===// 1390 1391// ----- 1392 1393func.func @arm_sme_usmops_4way_i8i8_to_i32(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>) -> vector<[4]x[4]xi32> { 1394 // CHECK: arm_sme.usmops_4way {{.*}}, {{.*}} : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1395 %reuslt = arm_sme.usmops_4way %vecA, %vecB : vector<[16]xi8>, vector<[16]xi8> into vector<[4]x[4]xi32> 1396 return %reuslt : vector<[4]x[4]xi32> 1397} 1398 1399// ----- 1400 1401func.func @arm_sme_usmops_4way_i16i16_to_i64(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[2]x[2]xi64> { 1402 // CHECK: arm_sme.usmops_4way {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1403 %reuslt = arm_sme.usmops_4way %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> into vector<[2]x[2]xi64> 1404 return %reuslt : vector<[2]x[2]xi64> 1405} 1406 1407//===----------------------------------------------------------------------===// 1408// arm_sme.copy_tile 1409//===----------------------------------------------------------------------===// 1410 1411func.func @arm_sme_copy_tile(%vec: vector<[4]x[4]xf32>) -> vector<[4]x[4]xf32> { 1412 %result = arm_sme.copy_tile %vec : vector<[4]x[4]xf32> 1413 return %result : vector<[4]x[4]xf32> 1414} 1415