1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// RUN: %{compile} | %{run} | FileCheck %s 22// 23// Do the same run, but now with direct IR generation. 24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false 25// RUN: %{compile} | %{run} | FileCheck %s 26// 27// Do the same run, but now with direct IR generation and vectorization. 28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true 29// RUN: %{compile} | %{run} | FileCheck %s 30// 31// Do the same run, but now with direct IR generation and VLA vectorization. 32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} 33 34#SortedCOO = #sparse_tensor.encoding<{ 35 map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) 36}> 37 38#SortedCOOI32 = #sparse_tensor.encoding<{ 39 map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton), 40 posWidth = 32, 41 crdWidth = 32 42}> 43 44#CSR = #sparse_tensor.encoding<{ 45 map = (d0, d1) -> (d0 : dense, d1 : compressed), 46 posWidth = 32, 47 crdWidth = 32 48}> 49 50#BCOO = #sparse_tensor.encoding<{ 51 map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) 52}> 53 54module { 55 // 56 // Main driver. 57 // 58 func.func @main() { 59 %c0 = arith.constant 0 : index 60 %f0 = arith.constant 0.0 : f64 61 %i0 = arith.constant 0 : i32 62 63 // 64 // Setup COO. 65 // 66 67 %data = arith.constant dense< 68 [ 1.0, 2.0, 3.0 ] 69 > : tensor<3xf64> 70 71 %pos = arith.constant dense< 72 [0, 3] 73 > : tensor<2xindex> 74 75 %index = arith.constant dense< 76 [[ 1, 2], 77 [ 5, 6], 78 [ 7, 8]] 79 > : tensor<3x2xindex> 80 81 %pos32 = arith.constant dense< 82 [0, 3] 83 > : tensor<2xi32> 84 85 %index32 = arith.constant dense< 86 [[ 1, 2], 87 [ 5, 6], 88 [ 7, 8]] 89 > : tensor<3x2xi32> 90 91 %s4 = sparse_tensor.assemble (%pos, %index), %data : (tensor<2xindex>, tensor<3x2xindex>), tensor<3xf64> 92 to tensor<10x10xf64, #SortedCOO> 93 %s5 = sparse_tensor.assemble (%pos32, %index32), %data : (tensor<2xi32>, tensor<3x2xi32>), tensor<3xf64> 94 to tensor<10x10xf64, #SortedCOOI32> 95 96 // 97 // Setup CSR. 98 // 99 100 %csr_data = arith.constant dense< 101 [ 1.0, 2.0, 3.0 ] 102 > : tensor<3xf64> 103 104 %csr_pos32 = arith.constant dense< 105 [0, 1, 3] 106 > : tensor<3xi32> 107 108 %csr_index32 = arith.constant dense< 109 [1, 0, 1] 110 > : tensor<3xi32> 111 %csr = sparse_tensor.assemble (%csr_pos32, %csr_index32), %csr_data : (tensor<3xi32>, tensor<3xi32>), tensor<3xf64> 112 to tensor<2x2xf64, #CSR> 113 114 // 115 // Setup BCOO. 116 // 117 118 %bdata = arith.constant dense< 119 [ 1.0, 2.0, 3.0, 4.0, 5.0 ] 120 > : tensor<5xf64> 121 122 %bpos = arith.constant dense< 123 [0, 3, 3, 5] 124 > : tensor<4xindex> 125 126 %bindex = arith.constant dense< 127 [[ 1, 2], 128 [ 5, 6], 129 [ 7, 8], 130 [ 2, 3], 131 [ 4, 2], 132 [ 10, 10]] 133 > : tensor<6x2xindex> 134 135 %bs = sparse_tensor.assemble (%bpos, %bindex), %bdata : 136 (tensor<4xindex>, tensor<6x2xindex>), tensor<5xf64> to tensor<2x10x10xf64, #BCOO> 137 138 // 139 // Verify results. 140 // 141 142 // CHECK: 1 143 // CHECK-NEXT:2 144 // CHECK-NEXT:1 145 // 146 // CHECK-NEXT:5 147 // CHECK-NEXT:6 148 // CHECK-NEXT:2 149 // 150 // CHECK-NEXT:7 151 // CHECK-NEXT:8 152 // CHECK-NEXT:3 153 sparse_tensor.foreach in %s4 : tensor<10x10xf64, #SortedCOO> do { 154 ^bb0(%1: index, %2: index, %v: f64) : 155 vector.print %1: index 156 vector.print %2: index 157 vector.print %v: f64 158 } 159 160 // CHECK-NEXT:1 161 // CHECK-NEXT:2 162 // CHECK-NEXT:1 163 // 164 // CHECK-NEXT:5 165 // CHECK-NEXT:6 166 // CHECK-NEXT:2 167 // 168 // CHECK-NEXT:7 169 // CHECK-NEXT:8 170 // CHECK-NEXT:3 171 sparse_tensor.foreach in %s5 : tensor<10x10xf64, #SortedCOOI32> do { 172 ^bb0(%1: index, %2: index, %v: f64) : 173 vector.print %1: index 174 vector.print %2: index 175 vector.print %v: f64 176 } 177 178 // CHECK-NEXT:0 179 // CHECK-NEXT:1 180 // CHECK-NEXT:1 181 // 182 // CHECK-NEXT:1 183 // CHECK-NEXT:0 184 // CHECK-NEXT:2 185 // 186 // CHECK-NEXT:1 187 // CHECK-NEXT:1 188 // CHECK-NEXT:3 189 sparse_tensor.foreach in %csr : tensor<2x2xf64, #CSR> do { 190 ^bb0(%1: index, %2: index, %v: f64) : 191 vector.print %1: index 192 vector.print %2: index 193 vector.print %v: f64 194 } 195 196 // CHECK-NEXT:0 197 // CHECK-NEXT:1 198 // CHECK-NEXT:2 199 // CHECK-NEXT:1 200 // 201 // CHECK-NEXT:0 202 // CHECK-NEXT:5 203 // CHECK-NEXT:6 204 // CHECK-NEXT:2 205 // 206 // CHECK-NEXT:0 207 // CHECK-NEXT:7 208 // CHECK-NEXT:8 209 // CHECK-NEXT:3 210 // 211 // CHECK-NEXT:1 212 // CHECK-NEXT:2 213 // CHECK-NEXT:3 214 // CHECK-NEXT:4 215 // 216 // CHECK-NEXT:1 217 // CHECK-NEXT:4 218 // CHECK-NEXT:2 219 // CHECK-NEXT:5 220 sparse_tensor.foreach in %bs : tensor<2x10x10xf64, #BCOO> do { 221 ^bb0(%0: index, %1: index, %2: index, %v: f64) : 222 vector.print %0: index 223 vector.print %1: index 224 vector.print %2: index 225 vector.print %v: f64 226 } 227 228 // 229 // Verify disassemble operations. 230 // 231 232 %od = tensor.empty() : tensor<3xf64> 233 %op = tensor.empty() : tensor<2xi32> 234 %oi = tensor.empty() : tensor<3x2xi32> 235 %p, %i, %d, %pl, %il, %dl = sparse_tensor.disassemble %s5 : tensor<10x10xf64, #SortedCOOI32> 236 out_lvls(%op, %oi : tensor<2xi32>, tensor<3x2xi32>) 237 out_vals(%od : tensor<3xf64>) 238 -> (tensor<2xi32>, tensor<3x2xi32>), tensor<3xf64>, (i32, i64), index 239 240 // CHECK-NEXT: ( 1, 2, 3 ) 241 %vd = vector.transfer_read %d[%c0], %f0 : tensor<3xf64>, vector<3xf64> 242 vector.print %vd : vector<3xf64> 243 244 // CHECK-NEXT: ( ( 1, 2 ), ( 5, 6 ), ( 7, 8 ) ) 245 %vi = vector.transfer_read %i[%c0, %c0], %i0 : tensor<3x2xi32>, vector<3x2xi32> 246 vector.print %vi : vector<3x2xi32> 247 248 // CHECK-NEXT: 3 249 vector.print %dl : index 250 251 %d_csr = tensor.empty() : tensor<4xf64> 252 %p_csr = tensor.empty() : tensor<3xi32> 253 %i_csr = tensor.empty() : tensor<3xi32> 254 %rp_csr, %ri_csr, %rd_csr, %lp_csr, %li_csr, %ld_csr = sparse_tensor.disassemble %csr : tensor<2x2xf64, #CSR> 255 out_lvls(%p_csr, %i_csr : tensor<3xi32>, tensor<3xi32>) 256 out_vals(%d_csr : tensor<4xf64>) 257 -> (tensor<3xi32>, tensor<3xi32>), tensor<4xf64>, (i32, i64), index 258 259 // CHECK-NEXT: ( 1, 2, 3 ) 260 %vd_csr = vector.transfer_read %rd_csr[%c0], %f0 : tensor<4xf64>, vector<3xf64> 261 vector.print %vd_csr : vector<3xf64> 262 263 // CHECK-NEXT: 3 264 vector.print %ld_csr : index 265 266 %bod = tensor.empty() : tensor<6xf64> 267 %bop = tensor.empty() : tensor<4xindex> 268 %boi = tensor.empty() : tensor<6x2xindex> 269 %bp, %bi, %bd, %lp, %li, %ld = sparse_tensor.disassemble %bs : tensor<2x10x10xf64, #BCOO> 270 out_lvls(%bop, %boi : tensor<4xindex>, tensor<6x2xindex>) 271 out_vals(%bod : tensor<6xf64>) 272 -> (tensor<4xindex>, tensor<6x2xindex>), tensor<6xf64>, (i32, tensor<i64>), index 273 274 // CHECK-NEXT: ( 1, 2, 3, 4, 5 ) 275 %vbd = vector.transfer_read %bd[%c0], %f0 : tensor<6xf64>, vector<5xf64> 276 vector.print %vbd : vector<5xf64> 277 278 // CHECK-NEXT: 5 279 vector.print %ld : index 280 281 // CHECK-NEXT: ( ( 1, 2 ), ( 5, 6 ), ( 7, 8 ), ( 2, 3 ), ( 4, 2 ), ( {{.*}}, {{.*}} ) ) 282 %vbi = vector.transfer_read %bi[%c0, %c0], %c0 : tensor<6x2xindex>, vector<6x2xindex> 283 vector.print %vbi : vector<6x2xindex> 284 285 // CHECK-NEXT: 10 286 %si = tensor.extract %li[] : tensor<i64> 287 vector.print %si : i64 288 289 // TODO: This check is no longer needed once the codegen path uses the 290 // buffer deallocation pass. "dealloc_tensor" turn into a no-op in the 291 // codegen path. 292 %has_runtime = sparse_tensor.has_runtime_library 293 scf.if %has_runtime { 294 // sparse_tensor.assemble copies buffers when running with the runtime 295 // library. Deallocations are not needed when running in codegen mode. 296 bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO> 297 bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32> 298 bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR> 299 bufferization.dealloc_tensor %bs : tensor<2x10x10xf64, #BCOO> 300 } 301 302 bufferization.dealloc_tensor %li : tensor<i64> 303 bufferization.dealloc_tensor %od : tensor<3xf64> 304 bufferization.dealloc_tensor %op : tensor<2xi32> 305 bufferization.dealloc_tensor %oi : tensor<3x2xi32> 306 bufferization.dealloc_tensor %d_csr : tensor<4xf64> 307 bufferization.dealloc_tensor %p_csr : tensor<3xi32> 308 bufferization.dealloc_tensor %i_csr : tensor<3xi32> 309 bufferization.dealloc_tensor %bod : tensor<6xf64> 310 bufferization.dealloc_tensor %bop : tensor<4xindex> 311 bufferization.dealloc_tensor %boi : tensor<6x2xindex> 312 313 return 314 } 315} 316