xref: /llvm-project/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir (revision fe55c34d19628304e0ca6a0e14a0b786b93d0e02)
1// RUN: mlir-opt %s \
2// RUN:   -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \
3// RUN: %mcr_aarch64_cmd \
4// RUN:   -e=main -entry-point-result=void \
5// RUN:   -march=aarch64 -mattr="+sve,+sme" \
6// RUN:   -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils,%native_arm_sme_abi_shlib | \
7// RUN: FileCheck %s
8
9/// This function uses too many tiles! There's only two i16 tiles (ZA0.H and
10/// ZA1.H), but this function uses five i16 tiles! Very expensive spills/reloads
11/// will be inserted to emulate the extra three tiles. Note: This is only done
12/// to avoid the compiler erroring out but is expected to have very poor
13/// performance (hence the warning).
14func.func @use_too_many_tiles(%a: memref<?x?xi16>, %b:  memref<?x?xi16>, %c: memref<?x?xi16>) {
15  %c0 = arith.constant 0 : index
16  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
17  %tile_a = arith.constant dense<0> : vector<[8]x[8]xi16>
18  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
19  %tile_b = arith.constant dense<1> : vector<[8]x[8]xi16>
20  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
21  %tile_c = arm_sme.tile_load %a[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
22  %tile_d = arm_sme.tile_load %b[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
23  %tile_e = arm_sme.tile_load %c[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
24
25  // CHECK-LABEL: tile_a:
26  // CHECK-COUNT-8: ( 0, 0, 0, 0, 0, 0, 0, 0
27  vector.print str "tile_a:\n"
28  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
29  vector.print %tile_a : vector<[8]x[8]xi16>
30  // CHECK-LABEL: tile_b:
31  // CHECK-COUNT-8: ( 1, 1, 1, 1, 1, 1, 1, 1
32  vector.print str "tile_b:\n"
33  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
34  vector.print %tile_b : vector<[8]x[8]xi16>
35  // CHECK-LABEL: tile_c:
36  // CHECK-COUNT-8: ( 2, 2, 2, 2, 2, 2, 2, 2
37  vector.print str "tile_c:\n"
38  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
39  vector.print %tile_c : vector<[8]x[8]xi16>
40  // CHECK-LABEL: tile_d:
41  // CHECK-COUNT-8: ( 3, 3, 3, 3, 3, 3, 3, 3
42  vector.print str "tile_d:\n"
43  vector.print %tile_d : vector<[8]x[8]xi16>
44  // CHECK-LABEL: tile_e:
45  // CHECK-COUNT-8: ( 4, 4, 4, 4, 4, 4, 4, 4
46  vector.print str "tile_e:\n"
47  vector.print %tile_e : vector<[8]x[8]xi16>
48  return
49}
50
51func.func @main() {
52  %c16 = arith.constant 16 : index
53  %svl_h = arm_sme.streaming_vl <half>
54
55  %c2 = arith.constant 2 : i16
56  %c3 = arith.constant 3 : i16
57  %c4 = arith.constant 4 : i16
58
59  %memA = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
60  %memB = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
61  %memC = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
62
63  linalg.fill ins(%c2 : i16) outs(%memA : memref<?x?xi16>)
64  linalg.fill ins(%c3 : i16) outs(%memB : memref<?x?xi16>)
65  linalg.fill ins(%c4 : i16) outs(%memC : memref<?x?xi16>)
66
67  func.call @use_too_many_tiles(%memA, %memB, %memC) : (memref<?x?xi16>, memref<?x?xi16>, memref<?x?xi16>) -> ()
68  return
69}
70