1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false 22// RUN: %{compile} | %{run} | FileCheck %s 23 24// TODO: support lib path. 25 26#DCSR = #sparse_tensor.encoding<{ 27 map = (d0, d1) -> (d0 : compressed, d1 : compressed) 28}> 29 30#DCSR_SLICE = #sparse_tensor.encoding<{ 31 map = (d0 : #sparse_tensor<slice(0, 4, 1)>, d1 : #sparse_tensor<slice(0, 8, 1)>) -> (d0 : compressed, d1 : compressed) 32}> 33 34#CSR = #sparse_tensor.encoding<{ 35 map = (d0, d1) -> (d0 : dense, d1 : compressed) 36}> 37 38#CSR_SLICE = #sparse_tensor.encoding<{ 39 map = (d0 : #sparse_tensor<slice(0, 4, 1)>, d1 : #sparse_tensor<slice(0, 8, 1)>) -> (d0 : dense, d1 : compressed) 40}> 41 42#COO = #sparse_tensor.encoding<{ 43 map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) 44}> 45 46#CSR_SLICE_1 = #sparse_tensor.encoding<{ 47 map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(0, 4, 1)>) -> (d0 : dense, d1 : compressed) 48}> 49 50#DCSR_SLICE_1 = #sparse_tensor.encoding<{ 51 map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(1, 4, 1)>) -> (d0 : compressed, d1 : compressed) 52}> 53 54#COO_SLICE_1 = #sparse_tensor.encoding<{ 55 map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(0, 4, 1)>) -> (d0 : compressed(nonunique), d1 : singleton) 56}> 57 58#COO_SLICE_2 = #sparse_tensor.encoding<{ 59 map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(1, 4, 1)>) -> (d0 : compressed(nonunique), d1 : singleton) 60}> 61 62#CSR_SLICE_dyn = #sparse_tensor.encoding<{ 63 map = (d0 : #sparse_tensor<slice(?, 4, ?)>, d1 : #sparse_tensor<slice(?, 4, ?)>) -> (d0 : dense, d1 : compressed) 64}> 65 66#DCSR_SLICE_dyn = #sparse_tensor.encoding<{ 67 map = (d0 : #sparse_tensor<slice(?, 4, ?)>, d1 : #sparse_tensor<slice(?, 4, ?)>) -> (d0 : compressed, d1 : compressed) 68}> 69 70module { 71 func.func private @printMemrefF64(%ptr : tensor<*xf64>) 72 func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface } 73 74 // 75 // Computes C = A x B with all matrices dynamic sparse slice (SpMSpM) in CSR and DCSR 76 // 77 func.func @matmul_dyn(%A: tensor<4x4xf64, #CSR_SLICE_dyn>, 78 %B: tensor<4x4xf64, #DCSR_SLICE_dyn>) -> tensor<4x4xf64, #CSR> { 79 %C = tensor.empty() : tensor<4x4xf64, #CSR> 80 %D = linalg.matmul 81 ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_dyn>, tensor<4x4xf64, #DCSR_SLICE_dyn>) 82 outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 83 return %D: tensor<4x4xf64, #CSR> 84 } 85 86 // 87 // Computes C = A x B with one matrix CSR sparse slices and the other DSCR sparse slice. 88 // 89 func.func @matmul1(%A: tensor<4x4xf64, #CSR_SLICE_1>, 90 %B: tensor<4x4xf64, #DCSR_SLICE_1>) -> tensor<4x4xf64, #CSR> { 91 %C = tensor.empty() : tensor<4x4xf64, #CSR> 92 %D = linalg.matmul 93 ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_1>, tensor<4x4xf64, #DCSR_SLICE_1>) 94 outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 95 return %D: tensor<4x4xf64, #CSR> 96 } 97 98 // 99 // Computes C = A x B with one matrix CSR sparse slice and the other CSR sparse tensor. 100 // 101 func.func @matmul2(%A: tensor<4x8xf64, #CSR_SLICE>, 102 %B: tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> { 103 %C = tensor.empty() : tensor<4x4xf64, #CSR> 104 %D = linalg.matmul 105 ins(%A, %B: tensor<4x8xf64, #CSR_SLICE>, tensor<8x4xf64, #CSR>) 106 outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 107 return %D: tensor<4x4xf64, #CSR> 108 } 109 110 // 111 // Computes C = A x B with one matrix DCSR sparse slice and the other DCSR sparse tensor. 112 // 113 func.func @matmul3(%A: tensor<4x8xf64, #DCSR_SLICE>, 114 %B: tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 115 %C = tensor.empty() : tensor<4x4xf64, #DCSR> 116 %D = linalg.matmul 117 ins(%A, %B: tensor<4x8xf64, #DCSR_SLICE>, tensor<8x4xf64, #DCSR>) 118 outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 119 return %D: tensor<4x4xf64, #DCSR> 120 } 121 122 // 123 // Computes C = A x B with two COO slices. 124 // 125 func.func @matmul5(%A: tensor<4x4xf64, #COO_SLICE_1>, 126 %B: tensor<4x4xf64, #COO_SLICE_2>) -> tensor<4x4xf64, #COO> { 127 %C = tensor.empty() : tensor<4x4xf64, #COO> 128 %D = linalg.matmul 129 ins(%A, %B: tensor<4x4xf64, #COO_SLICE_1>, tensor<4x4xf64, #COO_SLICE_2>) 130 outs(%C: tensor<4x4xf64, #COO>) -> tensor<4x4xf64, #COO> 131 return %D: tensor<4x4xf64, #COO> 132 } 133 // 134 // Main driver. 135 // 136 func.func @main() { 137 %c_0 = arith.constant 0 : index 138 %c_1 = arith.constant 1 : index 139 %c_2 = arith.constant 2 : index 140 %f0 = arith.constant 0.0 : f64 141 142 %sa = arith.constant dense<[ 143 [ 0.0, 2.1, 0.0, 0.0, 0.0, 6.1, 0.0, 0.0 ], 144 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 145 [ 0.0, 2.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 146 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ], 147 [ 0.0, 2.1, 0.0, 0.0, 0.0, 6.1, 0.0, 0.0 ], 148 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 149 [ 0.0, 2.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 150 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] 151 ]> : tensor<8x8xf64> 152 %sb = arith.constant dense<[ 153 [ 0.0, 0.0, 0.0, 1.0 ], 154 [ 0.0, 0.0, 2.0, 0.0 ], 155 [ 0.0, 3.0, 0.0, 0.0 ], 156 [ 4.0, 0.0, 0.0, 0.0 ], 157 [ 0.0, 0.0, 0.0, 0.0 ], 158 [ 0.0, 5.0, 0.0, 0.0 ], 159 [ 0.0, 0.0, 6.0, 0.0 ], 160 [ 0.0, 0.0, 7.0, 8.0 ] 161 ]> : tensor<8x4xf64> 162 163 // Convert all these matrices to sparse format. 164 %tmp = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #DCSR> 165 %a = tensor.extract_slice %tmp[0, 0][4, 8][1, 1] : tensor<8x8xf64, #DCSR> to tensor<4x8xf64, #DCSR_SLICE> 166 %b = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #DCSR> 167 168 %2 = call @matmul3(%a, %b) 169 : (tensor<4x8xf64, #DCSR_SLICE>, 170 tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 171 172 // DCSR test 173 // 174 // CHECK: ---- Sparse Tensor ---- 175 // CHECK-NEXT: nse = 5 176 // CHECK-NEXT: dim = ( 4, 4 ) 177 // CHECK-NEXT: lvl = ( 4, 4 ) 178 // CHECK-NEXT: pos[0] : ( 0, 3 ) 179 // CHECK-NEXT: crd[0] : ( 0, 2, 3 ) 180 // CHECK-NEXT: pos[1] : ( 0, 2, 3, 5 ) 181 // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 ) 182 // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 ) 183 // CHECK-NEXT: ---- 184 // 185 sparse_tensor.print %2 : tensor<4x4xf64, #DCSR> 186 187 %t1 = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #CSR> 188 %a1 = tensor.extract_slice %t1[0, 0][4, 8][1, 1] : tensor<8x8xf64, #CSR> to tensor<4x8xf64, #CSR_SLICE> 189 %b1 = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #CSR> 190 %3 = call @matmul2(%a1, %b1) 191 : (tensor<4x8xf64, #CSR_SLICE>, 192 tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 193 194 // CSR test 195 // 196 // CHECK: ---- Sparse Tensor ---- 197 // CHECK-NEXT: nse = 5 198 // CHECK-NEXT: dim = ( 4, 4 ) 199 // CHECK-NEXT: lvl = ( 4, 4 ) 200 // CHECK-NEXT: pos[1] : ( 0, 2, 2, 3, 5 ) 201 // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 ) 202 // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 ) 203 // CHECK-NEXT: ---- 204 // 205 sparse_tensor.print %3 : tensor<4x4xf64, #CSR> 206 207 208 // slice x slice 209 // 210 // CHECK: ---- Sparse Tensor ---- 211 // CHECK-NEXT: nse = 3 212 // CHECK-NEXT: dim = ( 4, 4 ) 213 // CHECK-NEXT: lvl = ( 4, 4 ) 214 // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 ) 215 // CHECK-NEXT: crd[1] : ( 0, 0, 0 ) 216 // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 ) 217 // CHECK-NEXT: ---- 218 // 219 %s1 = tensor.extract_slice %tmp[0, 1][4, 4][2, 1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_1> 220 %s2 = tensor.extract_slice %b1[0, 0][4, 4][2, 1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_1> 221 %4 = call @matmul1(%s2, %s1) 222 : (tensor<4x4xf64, #CSR_SLICE_1>, 223 tensor<4x4xf64, #DCSR_SLICE_1>) -> tensor<4x4xf64, #CSR> 224 sparse_tensor.print %4 : tensor<4x4xf64, #CSR> 225 226 // slice coo x slice coo 227 // 228 // CHECK: ---- Sparse Tensor ---- 229 // CHECK-NEXT: nse = 3 230 // CHECK-NEXT: dim = ( 4, 4 ) 231 // CHECK-NEXT: lvl = ( 4, 4 ) 232 // CHECK-NEXT: pos[0] : ( 0, 3 ) 233 // CHECK-NEXT: crd[0] : ( 0, 0, 1, 0, 3, 0 ) 234 // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 ) 235 // CHECK-NEXT: ---- 236 // 237 %t1_coo = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #COO> 238 %b1_coo = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #COO> 239 %s2_coo = tensor.extract_slice %b1_coo[0, 0][4, 4][2, 1] : tensor<8x4xf64, #COO> to tensor<4x4xf64, #COO_SLICE_1> 240 %s1_coo = tensor.extract_slice %t1_coo[0, 1][4, 4][2, 1] : tensor<8x8xf64, #COO> to tensor<4x4xf64, #COO_SLICE_2> 241 %o_coo = call @matmul5(%s2_coo, %s1_coo) : (tensor<4x4xf64, #COO_SLICE_1>, tensor<4x4xf64, #COO_SLICE_2>) -> tensor<4x4xf64, #COO> 242 sparse_tensor.print %o_coo : tensor<4x4xf64, #COO> 243 244 // slice x slice (same as above, but with dynamic stride information) 245 // 246 // CHECK: ---- Sparse Tensor ---- 247 // CHECK-NEXT: nse = 3 248 // CHECK-NEXT: dim = ( 4, 4 ) 249 // CHECK-NEXT: lvl = ( 4, 4 ) 250 // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 ) 251 // CHECK-NEXT: crd[1] : ( 0, 0, 0 ) 252 // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 ) 253 // CHECK-NEXT: ---- 254 // 255 %s1_dyn = tensor.extract_slice %tmp[%c_0, %c_1][4, 4][%c_2, %c_1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_dyn> 256 %s2_dyn = tensor.extract_slice %b1[%c_0, %c_0][4, 4][%c_2, %c_1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_dyn> 257 %dyn_4 = call @matmul_dyn(%s2_dyn, %s1_dyn) 258 : (tensor<4x4xf64, #CSR_SLICE_dyn>, 259 tensor<4x4xf64, #DCSR_SLICE_dyn>) -> tensor<4x4xf64, #CSR> 260 sparse_tensor.print %dyn_4 : tensor<4x4xf64, #CSR> 261 262 // sparse slices should generate the same result as dense slices 263 // 264 // CHECK: [2.3, 0, 0, 0], 265 // CHECK-NEXT: [6.9, 0, 0, 0], 266 // CHECK-NEXT: [0, 0, 0, 0], 267 // CHECK-NEXT: [12.6, 0, 0, 0]] 268 // 269 %ds1 = tensor.extract_slice %sa[0, 1][4, 4][2, 1] : tensor<8x8xf64> to tensor<4x4xf64> 270 %ds2 = tensor.extract_slice %sb[0, 0][4, 4][2, 1] : tensor<8x4xf64> to tensor<4x4xf64> 271 272 %d = tensor.empty() : tensor<4x4xf64> 273 %zeroed = linalg.fill ins(%f0 : f64) outs(%d : tensor<4x4xf64>) 274 -> tensor<4x4xf64> 275 %r = linalg.matmul ins(%ds2, %ds1: tensor<4x4xf64>, tensor<4x4xf64>) 276 outs(%zeroed: tensor<4x4xf64>) -> tensor<4x4xf64> 277 %du = tensor.cast %r : tensor<4x4xf64> to tensor<*xf64> 278 call @printMemrefF64(%du) : (tensor<*xf64>) -> () 279 280 // Releases resources. 281 bufferization.dealloc_tensor %d : tensor<4x4xf64> 282 bufferization.dealloc_tensor %b1 : tensor<8x4xf64, #CSR> 283 bufferization.dealloc_tensor %t1 : tensor<8x8xf64, #CSR> 284 bufferization.dealloc_tensor %b1_coo : tensor<8x4xf64, #COO> 285 bufferization.dealloc_tensor %t1_coo : tensor<8x8xf64, #COO> 286 bufferization.dealloc_tensor %o_coo : tensor<4x4xf64, #COO> 287 bufferization.dealloc_tensor %b : tensor<8x4xf64, #DCSR> 288 bufferization.dealloc_tensor %tmp: tensor<8x8xf64, #DCSR> 289 bufferization.dealloc_tensor %4 : tensor<4x4xf64, #CSR> 290 bufferization.dealloc_tensor %3 : tensor<4x4xf64, #CSR> 291 bufferization.dealloc_tensor %2 : tensor<4x4xf64, #DCSR> 292 bufferization.dealloc_tensor %dyn_4 : tensor<4x4xf64, #CSR> 293 294 return 295 } 296} 297