1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// RUN: %{compile} | %{run} | FileCheck %s 22// 23// Do the same run, but now with direct IR generation. 24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false 25// RUN: %{compile} | %{run} | FileCheck %s 26// 27// Do the same run, but now with direct IR generation and vectorization. 28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true 29// RUN: %{compile} | %{run} | FileCheck %s 30// 31// Do the same run, but now with direct IR generation and VLA vectorization. 32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} 33 34#SortedCOO = #sparse_tensor.encoding<{ 35 map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) 36}> 37 38#SortedCOOSoA = #sparse_tensor.encoding<{ 39 map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) 40}> 41 42#CSR = #sparse_tensor.encoding<{ 43 map = (d0, d1) -> (d0 : dense, d1 : compressed) 44}> 45 46#trait = { 47 indexing_maps = [ 48 affine_map<(i,j) -> (i,j)>, // A 49 affine_map<(i,j) -> (i,j)>, // B 50 affine_map<(i,j) -> (i,j)> // X (out) 51 ], 52 iterator_types = ["parallel", "parallel"], 53 doc = "X(i,j) = A(i,j) + B(i,j)" 54} 55 56module { 57 func.func @add_coo_csr(%arga: tensor<8x8xf32, #CSR>, 58 %argb: tensor<8x8xf32, #SortedCOOSoA>) 59 -> tensor<8x8xf32> { 60 %empty = tensor.empty() : tensor<8x8xf32> 61 %zero = arith.constant 0.000000e+00 : f32 62 %init = linalg.fill 63 ins(%zero : f32) 64 outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> 65 %0 = linalg.generic #trait 66 ins(%arga, %argb: tensor<8x8xf32, #CSR>, 67 tensor<8x8xf32, #SortedCOOSoA>) 68 outs(%init: tensor<8x8xf32>) { 69 ^bb(%a: f32, %b: f32, %x: f32): 70 %0 = arith.addf %a, %b : f32 71 linalg.yield %0 : f32 72 } -> tensor<8x8xf32> 73 return %0 : tensor<8x8xf32> 74 } 75 76 func.func @add_coo_coo(%arga: tensor<8x8xf32, #SortedCOO>, 77 %argb: tensor<8x8xf32, #SortedCOOSoA>) 78 -> tensor<8x8xf32> { 79 %empty = tensor.empty() : tensor<8x8xf32> 80 %zero = arith.constant 0.000000e+00 : f32 81 %init = linalg.fill 82 ins(%zero : f32) 83 outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> 84 %0 = linalg.generic #trait 85 ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>, 86 tensor<8x8xf32, #SortedCOOSoA>) 87 outs(%init: tensor<8x8xf32>) { 88 ^bb(%a: f32, %b: f32, %x: f32): 89 %0 = arith.addf %a, %b : f32 90 linalg.yield %0 : f32 91 } -> tensor<8x8xf32> 92 return %0 : tensor<8x8xf32> 93 } 94 95 func.func @add_coo_coo_out_coo(%arga: tensor<8x8xf32, #SortedCOO>, 96 %argb: tensor<8x8xf32, #SortedCOOSoA>) 97 -> tensor<8x8xf32, #SortedCOOSoA> { 98 %init = tensor.empty() : tensor<8x8xf32, #SortedCOOSoA> 99 %0 = linalg.generic #trait 100 ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>, 101 tensor<8x8xf32, #SortedCOOSoA>) 102 outs(%init: tensor<8x8xf32, #SortedCOOSoA>) { 103 ^bb(%a: f32, %b: f32, %x: f32): 104 %0 = arith.addf %a, %b : f32 105 linalg.yield %0 : f32 106 } -> tensor<8x8xf32, #SortedCOOSoA> 107 return %0 : tensor<8x8xf32, #SortedCOOSoA> 108 } 109 110 111 func.func @add_coo_dense(%arga: tensor<8x8xf32>, 112 %argb: tensor<8x8xf32, #SortedCOOSoA>) 113 -> tensor<8x8xf32> { 114 %empty = tensor.empty() : tensor<8x8xf32> 115 %zero = arith.constant 0.000000e+00 : f32 116 %init = linalg.fill 117 ins(%zero : f32) 118 outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> 119 %0 = linalg.generic #trait 120 ins(%arga, %argb: tensor<8x8xf32>, 121 tensor<8x8xf32, #SortedCOOSoA>) 122 outs(%init: tensor<8x8xf32>) { 123 ^bb(%a: f32, %b: f32, %x: f32): 124 %0 = arith.addf %a, %b : f32 125 linalg.yield %0 : f32 126 } -> tensor<8x8xf32> 127 return %0 : tensor<8x8xf32> 128 } 129 130 func.func @main() { 131 %c0 = arith.constant 0 : index 132 %c1 = arith.constant 1 : index 133 %c8 = arith.constant 8 : index 134 135 %A = arith.constant dense< 136 [ [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ], 137 [ 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1 ], 138 [ 2.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2 ], 139 [ 3.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3 ], 140 [ 4.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4 ], 141 [ 5.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5 ], 142 [ 6.6, 2.6, 3.6, 4.6, 5.6, 6.6, 7.6, 8.6 ], 143 [ 7.7, 2.7, 3.7, 4.7, 5.7, 6.7, 7.7, 8.7 ] ] 144 > : tensor<8x8xf32> 145 %B = arith.constant dense< 146 [ [ 7.8, 2.8, 3.8, 0.8, 3.8, 0.1, 7.8, 8.8 ], 147 [ 3.3, 2.3, 1.3, 4.3, 3.3, 6.3, 9.3, 8.3 ], 148 [ 6.6, 2.6, 3.6, 4.6, 3.6, 6.6, 7.6, 7.6 ], 149 [ 1.0, 3.0, 3.0, 4.0, 3.0, 6.0, 7.0, 8.0 ], 150 [ 0.1, 2.1, 3.1, 4.1, 3.1, 6.1, 7.1, 8.1 ], 151 [ 4.4, 2.4, 3.4, 4.4, 3.4, 6.4, 8.4, 8.4 ], 152 [ 5.5, 3.5, 1.5, 4.5, 3.5, 6.5, 7.5, 8.5 ], 153 [ 7.7, 2.7, 3.7, 0.7, 5.7, 3.7, 3.7, 0.7 ] ] 154 > : tensor<8x8xf32> 155 156 // Stress test with a "sparse" version of A and B. 157 %CSR_A = sparse_tensor.convert %A 158 : tensor<8x8xf32> to tensor<8x8xf32, #CSR> 159 %COO_A = sparse_tensor.convert %A 160 : tensor<8x8xf32> to tensor<8x8xf32, #SortedCOO> 161 %COO_B = sparse_tensor.convert %B 162 : tensor<8x8xf32> to tensor<8x8xf32, #SortedCOOSoA> 163 164 %C1 = call @add_coo_dense(%A, %COO_B) : (tensor<8x8xf32>, 165 tensor<8x8xf32, #SortedCOOSoA>) 166 -> tensor<8x8xf32> 167 %C2 = call @add_coo_csr(%CSR_A, %COO_B) : (tensor<8x8xf32, #CSR>, 168 tensor<8x8xf32, #SortedCOOSoA>) 169 -> tensor<8x8xf32> 170 %C3 = call @add_coo_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>, 171 tensor<8x8xf32, #SortedCOOSoA>) 172 -> tensor<8x8xf32> 173 %COO_RET = call @add_coo_coo_out_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>, 174 tensor<8x8xf32, #SortedCOOSoA>) 175 -> tensor<8x8xf32, #SortedCOOSoA> 176 %C4 = sparse_tensor.convert %COO_RET : tensor<8x8xf32, #SortedCOOSoA> to tensor<8x8xf32> 177 178 // 179 // Verify computed matrix C. 180 // 181 // CHECK-COUNT-4: ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8 ) 182 // CHECK-NEXT-COUNT-4: ( 4.4, 4.4, 4.4, 8.4, 8.4, 12.4, 16.4, 16.4 ) 183 // CHECK-NEXT-COUNT-4: ( 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 15.8 ) 184 // CHECK-NEXT-COUNT-4: ( 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3 ) 185 // CHECK-NEXT-COUNT-4: ( 4.5, 4.5, 6.5, 8.5, 8.5, 12.5, 14.5, 16.5 ) 186 // CHECK-NEXT-COUNT-4: ( 9.9, 4.9, 6.9, 8.9, 8.9, 12.9, 15.9, 16.9 ) 187 // CHECK-NEXT-COUNT-4: ( 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1 ) 188 // CHECK-NEXT-COUNT-4: ( 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 ) 189 // 190 %f0 = arith.constant 0.0 : f32 191 scf.for %i = %c0 to %c8 step %c1 { 192 %v1 = vector.transfer_read %C1[%i, %c0], %f0 193 : tensor<8x8xf32>, vector<8xf32> 194 %v2 = vector.transfer_read %C2[%i, %c0], %f0 195 : tensor<8x8xf32>, vector<8xf32> 196 %v3 = vector.transfer_read %C3[%i, %c0], %f0 197 : tensor<8x8xf32>, vector<8xf32> 198 %v4 = vector.transfer_read %C4[%i, %c0], %f0 199 : tensor<8x8xf32>, vector<8xf32> 200 vector.print %v1 : vector<8xf32> 201 vector.print %v2 : vector<8xf32> 202 vector.print %v3 : vector<8xf32> 203 vector.print %v4 : vector<8xf32> 204 } 205 206 // 207 // Ensure that COO-SoA output has the same values. 208 // 209 // CHECK: ---- Sparse Tensor ---- 210 // CHECK-NEXT: nse = 64 211 // CHECK-NEXT: dim = ( 8, 8 ) 212 // CHECK-NEXT: lvl = ( 8, 8 ) 213 // CHECK-NEXT: pos[0] : ( 0, 64 ) 214 // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 215 // CHECK-SAME: 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 216 // CHECK-SAME: 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 217 // CHECK-SAME: 7, 7, 7, 7 ) 218 // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 219 // CHECK-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 220 // CHECK-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 221 // CHECK-SAME: 4, 5, 6, 7 ) 222 // CHECK-NEXT: values : ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8, 4.4, 4.4, 4.4, 8.4, 223 // CHECK-SAME: 8.4, 12.4, 16.4, 16.4, 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 224 // CHECK-SAME: 15.8, 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3, 4.5, 4.5, 225 // CHECK-SAME: 6.5, 8.5, 8.5, 12.5, 14.5, 16.5, 9.9, 4.9, 6.9, 8.9, 8.9, 226 // CHECK-SAME: 12.9, 15.9, 16.9, 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1, 227 // CHECK-SAME: 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 ) 228 // CHECK-NEXT: ---- 229 // 230 sparse_tensor.print %COO_RET : tensor<8x8xf32, #SortedCOOSoA> 231 232 // Release resources. 233 bufferization.dealloc_tensor %C1 : tensor<8x8xf32> 234 bufferization.dealloc_tensor %C2 : tensor<8x8xf32> 235 bufferization.dealloc_tensor %C3 : tensor<8x8xf32> 236 bufferization.dealloc_tensor %C4 : tensor<8x8xf32> 237 bufferization.dealloc_tensor %CSR_A : tensor<8x8xf32, #CSR> 238 bufferization.dealloc_tensor %COO_A : tensor<8x8xf32, #SortedCOO> 239 bufferization.dealloc_tensor %COO_B : tensor<8x8xf32, #SortedCOOSoA> 240 bufferization.dealloc_tensor %COO_RET : tensor<8x8xf32, #SortedCOOSoA> 241 242 243 return 244 } 245} 246