1// RUN: mlir-opt %s \ 2// RUN: -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \ 3// RUN: %mcr_aarch64_cmd \ 4// RUN: -e=main -entry-point-result=void \ 5// RUN: -march=aarch64 -mattr="+sve,+sme" \ 6// RUN: -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils,%native_arm_sme_abi_shlib | \ 7// RUN: FileCheck %s 8 9/// This function uses too many tiles! There's only two i16 tiles (ZA0.H and 10/// ZA1.H), but this function uses five i16 tiles! Very expensive spills/reloads 11/// will be inserted to emulate the extra three tiles. Note: This is only done 12/// to avoid the compiler erroring out but is expected to have very poor 13/// performance (hence the warning). 14func.func @use_too_many_tiles(%a: memref<?x?xi16>, %b: memref<?x?xi16>, %c: memref<?x?xi16>) { 15 %c0 = arith.constant 0 : index 16 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 17 %tile_a = arith.constant dense<0> : vector<[8]x[8]xi16> 18 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 19 %tile_b = arith.constant dense<1> : vector<[8]x[8]xi16> 20 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 21 %tile_c = arm_sme.tile_load %a[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 22 %tile_d = arm_sme.tile_load %b[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 23 %tile_e = arm_sme.tile_load %c[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 24 25 // CHECK-LABEL: tile_a: 26 // CHECK-COUNT-8: ( 0, 0, 0, 0, 0, 0, 0, 0 27 vector.print str "tile_a:\n" 28 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 29 vector.print %tile_a : vector<[8]x[8]xi16> 30 // CHECK-LABEL: tile_b: 31 // CHECK-COUNT-8: ( 1, 1, 1, 1, 1, 1, 1, 1 32 vector.print str "tile_b:\n" 33 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 34 vector.print %tile_b : vector<[8]x[8]xi16> 35 // CHECK-LABEL: tile_c: 36 // CHECK-COUNT-8: ( 2, 2, 2, 2, 2, 2, 2, 2 37 vector.print str "tile_c:\n" 38 // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}} 39 vector.print %tile_c : vector<[8]x[8]xi16> 40 // CHECK-LABEL: tile_d: 41 // CHECK-COUNT-8: ( 3, 3, 3, 3, 3, 3, 3, 3 42 vector.print str "tile_d:\n" 43 vector.print %tile_d : vector<[8]x[8]xi16> 44 // CHECK-LABEL: tile_e: 45 // CHECK-COUNT-8: ( 4, 4, 4, 4, 4, 4, 4, 4 46 vector.print str "tile_e:\n" 47 vector.print %tile_e : vector<[8]x[8]xi16> 48 return 49} 50 51func.func @main() { 52 %c16 = arith.constant 16 : index 53 %svl_h = arm_sme.streaming_vl <half> 54 55 %c2 = arith.constant 2 : i16 56 %c3 = arith.constant 3 : i16 57 %c4 = arith.constant 4 : i16 58 59 %memA = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16> 60 %memB = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16> 61 %memC = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16> 62 63 linalg.fill ins(%c2 : i16) outs(%memA : memref<?x?xi16>) 64 linalg.fill ins(%c3 : i16) outs(%memB : memref<?x?xi16>) 65 linalg.fill ins(%c4 : i16) outs(%memC : memref<?x?xi16>) 66 67 func.call @use_too_many_tiles(%memA, %memB, %memC) : (memref<?x?xi16>, memref<?x?xi16>, memref<?x?xi16>) -> () 68 return 69} 70