xref: /llvm-project/mlir/test/Dialect/ArmSME/tile-allocation-spills-with-mixed-tile-types.mlir (revision 6c7ec6e1e6646fb334064bda6f301fdb52390d9b)
1// RUN: mlir-opt %s -test-arm-sme-tile-allocation -split-input-file | FileCheck %s
2
3// CHECK-LABEL: @always_spill_larger_or_equal_tile_type
4// CHECK: arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32>
5// CHECK: arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32>
6// CHECK: arm_sme.zero {tile_id = 2 : i32} : vector<[4]x[4]xf32>
7// CHECK: arm_sme.zero {tile_id = 3 : i32} : vector<[4]x[4]xf32>
8// CHECK: arm_sme.tile_load {{.*}} {tile_id = 16 : i32} : memref<?x?xf16>, vector<[8]x[8]xf16>
9func.func @always_spill_larger_or_equal_tile_type(%memref: memref<?x?xf16>) -> (vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[8]x[8]xf16>) {
10  %c0 = arith.constant 0 : index
11  %0 = arm_sme.zero : vector<[4]x[4]xf32>
12  %1 = arm_sme.zero : vector<[4]x[4]xf32>
13  %2 = arm_sme.zero : vector<[4]x[4]xf32>
14  %3 = arm_sme.zero : vector<[4]x[4]xf32>
15  // The load will be spilled (even though the zero's are 'trivial' spills) as a single `f32` tile would not fit the load.
16  %load = arm_sme.tile_load %memref[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
17  return %0, %1, %2, %3, %load : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[8]x[8]xf16>
18}
19
20// -----
21
22// CHECK-LABEL: @spill_larger_tile_type
23// CHECK: arm_sme.zero {tile_id = 16 : i32} : vector<[16]x[16]xi8>
24// CHECK: arm_sme.tile_load {{.*}} {tile_id = 0 : i32} : memref<?x?xf32>, vector<[4]x[4]xf32>
25// CHECK: arm_sme.tile_load {{.*}} {tile_id = 1 : i32} : memref<?x?xf32>, vector<[4]x[4]xf32>
26// CHECK: arm_sme.tile_load {{.*}} {tile_id = 2 : i32} : memref<?x?xf32>, vector<[4]x[4]xf32>
27// CHECK: arm_sme.tile_load {{.*}} {tile_id = 3 : i32} : memref<?x?xf32>, vector<[4]x[4]xf32>
28func.func @spill_larger_tile_type(%memref: memref<?x?xf32>) -> (vector<[16]x[16]xi8>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>) {
29  %c0 = arith.constant 0 : index
30  // Spilling the `arm_sme.zero` should free up space for all four f32 tiles.
31  %0 = arm_sme.zero : vector<[16]x[16]xi8>
32  %1 = arm_sme.tile_load %memref[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
33  %2 = arm_sme.tile_load %memref[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
34  %3 = arm_sme.tile_load %memref[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
35  %4 = arm_sme.tile_load %memref[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
36  return %0, %1, %2, %3, %4 : vector<[16]x[16]xi8>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>
37}
38