1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// RUN: %{compile} | %{run} | FileCheck %s 22// 23// Do the same run, but now with direct IR generation. 24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true 25// RUN: %{compile} | %{run} | FileCheck %s 26// 27// Do the same run, but now with vectorization. 28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true 29// RUN: %{compile} | %{run} | FileCheck %s 30// 31// Do the same run, but now with VLA vectorization. 32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} 33 34#DCSR = #sparse_tensor.encoding<{ 35 map = (d0, d1) -> (d0 : compressed, d1 : compressed) 36}> 37 38#DCSC = #sparse_tensor.encoding<{ 39 map = (d0, d1) -> (d1 : compressed, d0 : compressed) 40}> 41 42#transpose_trait = { 43 indexing_maps = [ 44 affine_map<(i,j) -> (j,i)>, // A 45 affine_map<(i,j) -> (i,j)> // X 46 ], 47 iterator_types = ["parallel", "parallel"], 48 doc = "X(i,j) = A(j,i)" 49} 50 51module { 52 53 // 54 // Transposing a sparse row-wise matrix into another sparse row-wise 55 // matrix introduces a cycle in the iteration graph. This complication 56 // can be avoided by manually inserting a conversion of the incoming 57 // matrix into a sparse column-wise matrix first. 58 // 59 func.func @sparse_transpose(%arga: tensor<3x4xf64, #DCSR>) 60 -> tensor<4x3xf64, #DCSR> { 61 %t = sparse_tensor.convert %arga 62 : tensor<3x4xf64, #DCSR> to tensor<3x4xf64, #DCSC> 63 64 %i = tensor.empty() : tensor<4x3xf64, #DCSR> 65 %0 = linalg.generic #transpose_trait 66 ins(%t: tensor<3x4xf64, #DCSC>) 67 outs(%i: tensor<4x3xf64, #DCSR>) { 68 ^bb(%a: f64, %x: f64): 69 linalg.yield %a : f64 70 } -> tensor<4x3xf64, #DCSR> 71 72 bufferization.dealloc_tensor %t : tensor<3x4xf64, #DCSC> 73 74 return %0 : tensor<4x3xf64, #DCSR> 75 } 76 77 // 78 // However, even better, the sparsifier is able to insert such a 79 // conversion automatically to resolve a cycle in the iteration graph! 80 // 81 func.func @sparse_transpose_auto(%arga: tensor<3x4xf64, #DCSR>) 82 -> tensor<4x3xf64, #DCSR> { 83 %i = tensor.empty() : tensor<4x3xf64, #DCSR> 84 %0 = linalg.generic #transpose_trait 85 ins(%arga: tensor<3x4xf64, #DCSR>) 86 outs(%i: tensor<4x3xf64, #DCSR>) { 87 ^bb(%a: f64, %x: f64): 88 linalg.yield %a : f64 89 } -> tensor<4x3xf64, #DCSR> 90 return %0 : tensor<4x3xf64, #DCSR> 91 } 92 93 // 94 // Main driver. 95 // 96 func.func @main() { 97 %c0 = arith.constant 0 : index 98 %c1 = arith.constant 1 : index 99 %c4 = arith.constant 4 : index 100 %du = arith.constant 0.0 : f64 101 102 // Setup input sparse matrix from compressed constant. 103 %d = arith.constant dense <[ 104 [ 1.1, 1.2, 0.0, 1.4 ], 105 [ 0.0, 0.0, 0.0, 0.0 ], 106 [ 3.1, 0.0, 3.3, 3.4 ] 107 ]> : tensor<3x4xf64> 108 %a = sparse_tensor.convert %d : tensor<3x4xf64> to tensor<3x4xf64, #DCSR> 109 110 // Call the kernels. 111 %0 = call @sparse_transpose(%a) 112 : (tensor<3x4xf64, #DCSR>) -> tensor<4x3xf64, #DCSR> 113 %1 = call @sparse_transpose_auto(%a) 114 : (tensor<3x4xf64, #DCSR>) -> tensor<4x3xf64, #DCSR> 115 116 // 117 // Verify result. 118 // 119 // CHECK: ---- Sparse Tensor ---- 120 // CHECK-NEXT: nse = 6 121 // CHECK-NEXT: dim = ( 4, 3 ) 122 // CHECK-NEXT: lvl = ( 4, 3 ) 123 // CHECK-NEXT: pos[0] : ( 0, 4 ) 124 // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 ) 125 // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 ) 126 // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 ) 127 // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 ) 128 // CHECK-NEXT: ---- 129 // CHECK: ---- Sparse Tensor ---- 130 // CHECK-NEXT: nse = 6 131 // CHECK-NEXT: dim = ( 4, 3 ) 132 // CHECK-NEXT: lvl = ( 4, 3 ) 133 // CHECK-NEXT: pos[0] : ( 0, 4 ) 134 // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 ) 135 // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4, 6 ) 136 // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2 ) 137 // CHECK-NEXT: values : ( 1.1, 3.1, 1.2, 3.3, 1.4, 3.4 ) 138 // CHECK-NEXT: ---- 139 // 140 sparse_tensor.print %0 : tensor<4x3xf64, #DCSR> 141 sparse_tensor.print %1 : tensor<4x3xf64, #DCSR> 142 143 // Release resources. 144 bufferization.dealloc_tensor %a : tensor<3x4xf64, #DCSR> 145 bufferization.dealloc_tensor %0 : tensor<4x3xf64, #DCSR> 146 bufferization.dealloc_tensor %1 : tensor<4x3xf64, #DCSR> 147 148 return 149 } 150} 151