1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" 22// RUN: %{compile} | env %{env} %{run} | FileCheck %s 23// 24// Do the same run, but now with direct IR generation. 25// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false 26// RUN: %{compile} | env %{env} %{run} | FileCheck %s 27// 28// Do the same run, but now with direct IR generation and vectorization. 29// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true 30// RUN: %{compile} | env %{env} %{run} | FileCheck %s 31// 32// Do the same run, but now with direct IR generation and, if available, VLA 33// vectorization. 34// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | env %{env} %{run_sve} | FileCheck %s %} 35 36!Filename = !llvm.ptr 37 38#SparseMatrix = #sparse_tensor.encoding<{ 39 map = (d0, d1) -> (d0 : compressed, d1 : compressed), 40 posWidth = 32, 41 crdWidth = 32 42}> 43 44#trait_sampled_dense_dense = { 45 indexing_maps = [ 46 affine_map<(i,j,k) -> (i,j)>, // S 47 affine_map<(i,j,k) -> (i,k)>, // A 48 affine_map<(i,j,k) -> (k,j)>, // B 49 affine_map<(i,j,k) -> (i,j)> // X (out) 50 ], 51 iterator_types = ["parallel", "parallel", "reduction"], 52 doc = "X(i,j) += S(i,j) SUM_k A(i,k) B(k,j)" 53} 54 55// 56// Integration test that lowers a kernel annotated as sparse to 57// actual sparse code, initializes a matching sparse storage scheme 58// from file, and runs the resulting code with the JIT compiler. 59// 60module { 61 // 62 // A kernel that computes a sampled matrix matrix multiplication. 63 // 64 func.func @sampled_dense_dense(%args: tensor<?x?xf32, #SparseMatrix>, 65 %arga: tensor<?x?xf32>, 66 %argb: tensor<?x?xf32>, 67 %argx: tensor<?x?xf32>) -> tensor<?x?xf32> { 68 %0 = linalg.generic #trait_sampled_dense_dense 69 ins(%args, %arga, %argb: tensor<?x?xf32, #SparseMatrix>, tensor<?x?xf32>, tensor<?x?xf32>) 70 outs(%argx: tensor<?x?xf32>) { 71 ^bb(%s: f32, %a: f32, %b: f32, %x: f32): 72 %0 = arith.mulf %a, %b : f32 73 %1 = arith.mulf %s, %0 : f32 74 %2 = arith.addf %x, %1 : f32 75 linalg.yield %2 : f32 76 } -> tensor<?x?xf32> 77 return %0 : tensor<?x?xf32> 78 } 79 80 func.func private @getTensorFilename(index) -> (!Filename) 81 82 // 83 // Main driver that reads matrix from file and calls the sparse kernel. 84 // 85 func.func @main() { 86 %d0 = arith.constant 0.0 : f32 87 %c0 = arith.constant 0 : index 88 %c1 = arith.constant 1 : index 89 %c5 = arith.constant 5 : index 90 %c10 = arith.constant 10 : index 91 92 // Initialize dense matrices. 93 %x = tensor.generate %c5, %c5 { 94 ^bb0(%i : index, %j : index): 95 tensor.yield %d0 : f32 96 } : tensor<?x?xf32> 97 98 %a = tensor.generate %c5, %c10 { 99 ^bb0(%i: index, %j: index): 100 %p = arith.addi %i, %c1 : index 101 %q = arith.index_cast %p : index to i32 102 %d = arith.sitofp %q : i32 to f32 103 tensor.yield %d : f32 104 } : tensor<?x?xf32> 105 106 %b = tensor.generate %c10, %c5 { 107 ^bb0(%i: index, %j: index): 108 %p = arith.addi %j, %c1 : index 109 %q = arith.index_cast %p : index to i32 110 %d = arith.sitofp %q : i32 to f32 111 tensor.yield %d : f32 112 } : tensor<?x?xf32> 113 114 // Read the sparse matrix from file, construct sparse storage. 115 %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) 116 %s = sparse_tensor.new %fileName : !Filename to tensor<?x?xf32, #SparseMatrix> 117 118 // Call the kernel. 119 %0 = call @sampled_dense_dense(%s, %a, %b, %x) 120 : (tensor<?x?xf32, #SparseMatrix>, 121 tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32> 122 123 // Print the result for verification. 124 // 125 // CHECK: ( 10, 0, 0, 56, 0 ) 126 // CHECK: ( 0, 80, 0, 0, 250 ) 127 // CHECK: ( 0, 0, 270, 0, 0 ) 128 // CHECK: ( 164, 0, 0, 640, 0 ) 129 // CHECK: ( 0, 520, 0, 0, 1250 ) 130 // 131 scf.for %i = %c0 to %c5 step %c1 { 132 %v = vector.transfer_read %0[%i, %c0], %d0: tensor<?x?xf32>, vector<5xf32> 133 vector.print %v : vector<5xf32> 134 } 135 136 // Release the resources. 137 bufferization.dealloc_tensor %s : tensor<?x?xf32, #SparseMatrix> 138 bufferization.dealloc_tensor %0 : tensor<?x?xf32> 139 bufferization.dealloc_tensor %a : tensor<?x?xf32> 140 bufferization.dealloc_tensor %b : tensor<?x?xf32> 141 142 return 143 } 144} 145