106a65ce5SPeiming Liu// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification="parallelization-strategy=any-storage-any-loop" | \ 275ac294bSPeiming Liu// RUN: FileCheck %s 375ac294bSPeiming Liu 475ac294bSPeiming Liu#CSR = #sparse_tensor.encoding<{ 5e2e429d9SYinying Li map = (d0, d1) -> (d0 : dense, d1 : compressed) 675ac294bSPeiming Liu}> 775ac294bSPeiming Liu 875ac294bSPeiming Liu#trait_matvec = { 975ac294bSPeiming Liu indexing_maps = [ 1075ac294bSPeiming Liu affine_map<(i,j) -> (i,j)>, // A 1175ac294bSPeiming Liu affine_map<(i,j) -> (j)>, // b 1275ac294bSPeiming Liu affine_map<(i,j) -> (i)> // x (out) 1375ac294bSPeiming Liu ], 1475ac294bSPeiming Liu iterator_types = ["parallel", "reduction"], 1575ac294bSPeiming Liu doc = "x(i) += A(i,j) * b(j)" 1675ac294bSPeiming Liu} 1775ac294bSPeiming Liu// CHECK-LABEL: func.func @matvec( 18c5a67e16SYinying Li// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse{{[0-9]*}}>, 1975ac294bSPeiming Liu// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, 2075ac294bSPeiming Liu// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { 2175ac294bSPeiming Liu// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index 2275ac294bSPeiming Liu// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index 2375ac294bSPeiming Liu// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index 24a02010b3SPeiming Liu// CHECK-DAG: %[[TMP_0:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} 25a02010b3SPeiming Liu// CHECK-DAG: %[[TMP_1:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} 26a02010b3SPeiming Liu// CHECK-DAG: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] 27*ced2fc78SChristopher Bate// CHECK-DAG: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : tensor<32xf32> to memref<32xf32> 28*ced2fc78SChristopher Bate// CHECK-DAG: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : tensor<16xf32> to memref<16xf32> 2975ac294bSPeiming Liu// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { 3075ac294bSPeiming Liu// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> 3175ac294bSPeiming Liu// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref<?xindex> 3275ac294bSPeiming Liu// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index 3375ac294bSPeiming Liu// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref<?xindex> 3475ac294bSPeiming Liu// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { 3575ac294bSPeiming Liu// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref<?xindex> 3675ac294bSPeiming Liu// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref<?xf32> 3775ac294bSPeiming Liu// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> 3875ac294bSPeiming Liu// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 3910056c82SMatthias Springer// CHECK: scf.reduce(%[[TMP_14]] : f32) { 4075ac294bSPeiming Liu// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): 4175ac294bSPeiming Liu// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 4275ac294bSPeiming Liu// CHECK: scf.reduce.return %[[TMP_15]] : f32 4375ac294bSPeiming Liu// CHECK: } 4475ac294bSPeiming Liu// CHECK: } 4575ac294bSPeiming Liu// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> 4610056c82SMatthias Springer// CHECK: scf.reduce 4775ac294bSPeiming Liu// CHECK: } 4875ac294bSPeiming Liu// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> 4975ac294bSPeiming Liu// CHECK: return %[[TMP_5]] : tensor<16xf32> 5075ac294bSPeiming Liufunc.func @matvec(%arga: tensor<16x32xf32, #CSR>, 5175ac294bSPeiming Liu %argb: tensor<32xf32>, 5275ac294bSPeiming Liu %argx: tensor<16xf32>) -> tensor<16xf32> { 5375ac294bSPeiming Liu %0 = linalg.generic #trait_matvec 5475ac294bSPeiming Liu ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) 5575ac294bSPeiming Liu outs(%argx: tensor<16xf32>) { 5675ac294bSPeiming Liu ^bb(%A: f32, %b: f32, %x: f32): 5775ac294bSPeiming Liu %0 = arith.mulf %A, %b : f32 5875ac294bSPeiming Liu %1 = arith.addf %0, %x : f32 5975ac294bSPeiming Liu linalg.yield %1 : f32 6075ac294bSPeiming Liu } -> tensor<16xf32> 6175ac294bSPeiming Liu return %0 : tensor<16xf32> 6275ac294bSPeiming Liu} 63