xref: /llvm-project/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1//--------------------------------------------------------------------------------------------------
2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
3//
4// Set-up that's shared across all tests in this directory. In principle, this
5// config could be moved to lit.local.cfg. However, there are downstream users that
6//  do not use these LIT config files. Hence why this is kept inline.
7//
8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils
14// DEFINE: %{run_opts} = -e main -entry-point-result=void
15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs}
16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve}
17//
18// DEFINE: %{env} =
19//--------------------------------------------------------------------------------------------------
20
21// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
22// RUN: %{compile} | %{run} | FileCheck %s
23
24// TODO: support lib path.
25
26#DCSR = #sparse_tensor.encoding<{
27  map = (d0, d1) -> (d0 : compressed, d1 : compressed)
28}>
29
30#DCSR_SLICE = #sparse_tensor.encoding<{
31  map = (d0 : #sparse_tensor<slice(0, 4, 1)>, d1 : #sparse_tensor<slice(0, 8, 1)>) -> (d0 : compressed, d1 : compressed)
32}>
33
34#CSR = #sparse_tensor.encoding<{
35  map = (d0, d1) -> (d0 : dense, d1 : compressed)
36}>
37
38#CSR_SLICE = #sparse_tensor.encoding<{
39  map = (d0 : #sparse_tensor<slice(0, 4, 1)>, d1 : #sparse_tensor<slice(0, 8, 1)>) -> (d0 : dense, d1 : compressed)
40}>
41
42#COO = #sparse_tensor.encoding<{
43  map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)
44}>
45
46#CSR_SLICE_1 = #sparse_tensor.encoding<{
47  map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(0, 4, 1)>) -> (d0 : dense, d1 : compressed)
48}>
49
50#DCSR_SLICE_1 = #sparse_tensor.encoding<{
51  map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(1, 4, 1)>) -> (d0 : compressed, d1 : compressed)
52}>
53
54#COO_SLICE_1 = #sparse_tensor.encoding<{
55  map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(0, 4, 1)>) -> (d0 : compressed(nonunique), d1 : singleton)
56}>
57
58#COO_SLICE_2 = #sparse_tensor.encoding<{
59  map = (d0 : #sparse_tensor<slice(0, 4, 2)>, d1 : #sparse_tensor<slice(1, 4, 1)>) -> (d0 : compressed(nonunique), d1 : singleton)
60}>
61
62#CSR_SLICE_dyn = #sparse_tensor.encoding<{
63  map = (d0 : #sparse_tensor<slice(?, 4, ?)>, d1 : #sparse_tensor<slice(?, 4, ?)>) -> (d0 : dense, d1 : compressed)
64}>
65
66#DCSR_SLICE_dyn = #sparse_tensor.encoding<{
67  map = (d0 : #sparse_tensor<slice(?, 4, ?)>, d1 : #sparse_tensor<slice(?, 4, ?)>) -> (d0 : compressed, d1 : compressed)
68}>
69
70module {
71  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
72  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
73
74  //
75  // Computes C = A x B with all matrices dynamic sparse slice (SpMSpM) in CSR and DCSR
76  //
77  func.func @matmul_dyn(%A: tensor<4x4xf64, #CSR_SLICE_dyn>,
78                        %B: tensor<4x4xf64, #DCSR_SLICE_dyn>) -> tensor<4x4xf64, #CSR> {
79    %C = tensor.empty() : tensor<4x4xf64, #CSR>
80    %D = linalg.matmul
81      ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_dyn>, tensor<4x4xf64, #DCSR_SLICE_dyn>)
82         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
83    return %D: tensor<4x4xf64, #CSR>
84  }
85
86  //
87  // Computes C = A x B with one matrix CSR sparse slices and the other DSCR sparse slice.
88  //
89  func.func @matmul1(%A: tensor<4x4xf64, #CSR_SLICE_1>,
90                     %B: tensor<4x4xf64, #DCSR_SLICE_1>) -> tensor<4x4xf64, #CSR> {
91    %C = tensor.empty() : tensor<4x4xf64, #CSR>
92    %D = linalg.matmul
93      ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_1>, tensor<4x4xf64, #DCSR_SLICE_1>)
94         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
95    return %D: tensor<4x4xf64, #CSR>
96  }
97
98  //
99  // Computes C = A x B with one matrix CSR sparse slice and the other CSR sparse tensor.
100  //
101  func.func @matmul2(%A: tensor<4x8xf64, #CSR_SLICE>,
102                     %B: tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> {
103    %C = tensor.empty() : tensor<4x4xf64, #CSR>
104    %D = linalg.matmul
105      ins(%A, %B: tensor<4x8xf64, #CSR_SLICE>, tensor<8x4xf64, #CSR>)
106         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
107    return %D: tensor<4x4xf64, #CSR>
108  }
109
110  //
111  // Computes C = A x B with one matrix DCSR sparse slice and the other DCSR sparse tensor.
112  //
113  func.func @matmul3(%A: tensor<4x8xf64, #DCSR_SLICE>,
114                     %B: tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
115    %C = tensor.empty() : tensor<4x4xf64, #DCSR>
116    %D = linalg.matmul
117      ins(%A, %B: tensor<4x8xf64, #DCSR_SLICE>, tensor<8x4xf64, #DCSR>)
118         outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
119    return %D: tensor<4x4xf64, #DCSR>
120  }
121
122  //
123  // Computes C = A x B with two COO slices.
124  //
125  func.func @matmul5(%A: tensor<4x4xf64, #COO_SLICE_1>,
126                     %B: tensor<4x4xf64, #COO_SLICE_2>) -> tensor<4x4xf64, #COO> {
127    %C = tensor.empty() : tensor<4x4xf64, #COO>
128    %D = linalg.matmul
129      ins(%A, %B: tensor<4x4xf64, #COO_SLICE_1>, tensor<4x4xf64, #COO_SLICE_2>)
130         outs(%C: tensor<4x4xf64, #COO>) -> tensor<4x4xf64, #COO>
131    return %D: tensor<4x4xf64, #COO>
132  }
133  //
134  // Main driver.
135  //
136  func.func @main() {
137    %c_0 = arith.constant 0 : index
138    %c_1 = arith.constant 1 : index
139    %c_2 = arith.constant 2 : index
140    %f0 = arith.constant 0.0 : f64
141
142    %sa = arith.constant dense<[
143        [ 0.0, 2.1, 0.0, 0.0, 0.0, 6.1, 0.0, 0.0 ],
144        [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
145        [ 0.0, 2.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
146        [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ],
147        [ 0.0, 2.1, 0.0, 0.0, 0.0, 6.1, 0.0, 0.0 ],
148        [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
149        [ 0.0, 2.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
150        [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]
151    ]> : tensor<8x8xf64>
152    %sb = arith.constant dense<[
153        [ 0.0, 0.0, 0.0, 1.0 ],
154        [ 0.0, 0.0, 2.0, 0.0 ],
155        [ 0.0, 3.0, 0.0, 0.0 ],
156        [ 4.0, 0.0, 0.0, 0.0 ],
157        [ 0.0, 0.0, 0.0, 0.0 ],
158        [ 0.0, 5.0, 0.0, 0.0 ],
159        [ 0.0, 0.0, 6.0, 0.0 ],
160        [ 0.0, 0.0, 7.0, 8.0 ]
161    ]> : tensor<8x4xf64>
162
163    // Convert all these matrices to sparse format.
164    %tmp = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #DCSR>
165    %a = tensor.extract_slice %tmp[0, 0][4, 8][1, 1] : tensor<8x8xf64, #DCSR> to tensor<4x8xf64, #DCSR_SLICE>
166    %b = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #DCSR>
167
168    %2 = call @matmul3(%a, %b)
169       : (tensor<4x8xf64, #DCSR_SLICE>,
170          tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
171
172    // DCSR test
173    //
174    // CHECK:      ---- Sparse Tensor ----
175    // CHECK-NEXT: nse = 5
176    // CHECK-NEXT: dim = ( 4, 4 )
177    // CHECK-NEXT: lvl = ( 4, 4 )
178    // CHECK-NEXT: pos[0] : ( 0, 3 )
179    // CHECK-NEXT: crd[0] : ( 0, 2, 3 )
180    // CHECK-NEXT: pos[1] : ( 0, 2, 3, 5 )
181    // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 )
182    // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 )
183    // CHECK-NEXT: ----
184    //
185    sparse_tensor.print %2 : tensor<4x4xf64, #DCSR>
186
187    %t1 = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #CSR>
188    %a1 = tensor.extract_slice %t1[0, 0][4, 8][1, 1] : tensor<8x8xf64, #CSR> to tensor<4x8xf64, #CSR_SLICE>
189    %b1 = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #CSR>
190    %3 = call @matmul2(%a1, %b1)
191       : (tensor<4x8xf64, #CSR_SLICE>,
192          tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
193
194    // CSR test
195    //
196    // CHECK:      ---- Sparse Tensor ----
197    // CHECK-NEXT: nse = 5
198    // CHECK-NEXT: dim = ( 4, 4 )
199    // CHECK-NEXT: lvl = ( 4, 4 )
200    // CHECK-NEXT: pos[1] : ( 0, 2, 2, 3, 5 )
201    // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 )
202    // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 )
203    // CHECK-NEXT: ----
204    //
205    sparse_tensor.print %3 : tensor<4x4xf64, #CSR>
206
207
208    // slice x slice
209    //
210    // CHECK:      ---- Sparse Tensor ----
211    // CHECK-NEXT: nse = 3
212    // CHECK-NEXT: dim = ( 4, 4 )
213    // CHECK-NEXT: lvl = ( 4, 4 )
214    // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 )
215    // CHECK-NEXT: crd[1] : ( 0, 0, 0 )
216    // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 )
217    // CHECK-NEXT: ----
218    //
219    %s1 = tensor.extract_slice %tmp[0, 1][4, 4][2, 1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_1>
220    %s2 = tensor.extract_slice %b1[0, 0][4, 4][2, 1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_1>
221    %4 = call @matmul1(%s2, %s1)
222       : (tensor<4x4xf64, #CSR_SLICE_1>,
223          tensor<4x4xf64, #DCSR_SLICE_1>) -> tensor<4x4xf64, #CSR>
224    sparse_tensor.print %4 : tensor<4x4xf64, #CSR>
225
226    // slice coo x slice coo
227    //
228    // CHECK:      ---- Sparse Tensor ----
229    // CHECK-NEXT: nse = 3
230    // CHECK-NEXT: dim = ( 4, 4 )
231    // CHECK-NEXT: lvl = ( 4, 4 )
232    // CHECK-NEXT: pos[0] : ( 0, 3 )
233    // CHECK-NEXT: crd[0] : ( 0, 0, 1, 0, 3, 0 )
234    // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 )
235    // CHECK-NEXT: ----
236    //
237    %t1_coo = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #COO>
238    %b1_coo = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #COO>
239    %s2_coo = tensor.extract_slice %b1_coo[0, 0][4, 4][2, 1] : tensor<8x4xf64, #COO> to tensor<4x4xf64, #COO_SLICE_1>
240    %s1_coo = tensor.extract_slice %t1_coo[0, 1][4, 4][2, 1] : tensor<8x8xf64, #COO> to tensor<4x4xf64, #COO_SLICE_2>
241    %o_coo = call @matmul5(%s2_coo, %s1_coo) : (tensor<4x4xf64, #COO_SLICE_1>, tensor<4x4xf64, #COO_SLICE_2>) -> tensor<4x4xf64, #COO>
242    sparse_tensor.print %o_coo : tensor<4x4xf64, #COO>
243
244    // slice x slice (same as above, but with dynamic stride information)
245    //
246    // CHECK:      ---- Sparse Tensor ----
247    // CHECK-NEXT: nse = 3
248    // CHECK-NEXT: dim = ( 4, 4 )
249    // CHECK-NEXT: lvl = ( 4, 4 )
250    // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 )
251    // CHECK-NEXT: crd[1] : ( 0, 0, 0 )
252    // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 )
253    // CHECK-NEXT: ----
254    //
255    %s1_dyn = tensor.extract_slice %tmp[%c_0, %c_1][4, 4][%c_2, %c_1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_dyn>
256    %s2_dyn = tensor.extract_slice %b1[%c_0, %c_0][4, 4][%c_2, %c_1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_dyn>
257    %dyn_4 = call @matmul_dyn(%s2_dyn, %s1_dyn)
258       : (tensor<4x4xf64, #CSR_SLICE_dyn>,
259          tensor<4x4xf64, #DCSR_SLICE_dyn>) -> tensor<4x4xf64, #CSR>
260    sparse_tensor.print %dyn_4 : tensor<4x4xf64, #CSR>
261
262    // sparse slices should generate the same result as dense slices
263    //
264    // CHECK:      [2.3,   0,   0,   0],
265    // CHECK-NEXT: [6.9,   0,   0,   0],
266    // CHECK-NEXT: [0,   0,   0,   0],
267    // CHECK-NEXT: [12.6,   0,   0,   0]]
268    //
269    %ds1 = tensor.extract_slice %sa[0, 1][4, 4][2, 1] : tensor<8x8xf64> to tensor<4x4xf64>
270    %ds2 = tensor.extract_slice %sb[0, 0][4, 4][2, 1] : tensor<8x4xf64> to tensor<4x4xf64>
271
272    %d = tensor.empty() : tensor<4x4xf64>
273    %zeroed = linalg.fill ins(%f0 : f64) outs(%d : tensor<4x4xf64>)
274        -> tensor<4x4xf64>
275    %r = linalg.matmul ins(%ds2, %ds1: tensor<4x4xf64>, tensor<4x4xf64>)
276                       outs(%zeroed: tensor<4x4xf64>) -> tensor<4x4xf64>
277    %du = tensor.cast %r : tensor<4x4xf64> to tensor<*xf64>
278    call @printMemrefF64(%du) : (tensor<*xf64>) -> ()
279
280    // Releases resources.
281    bufferization.dealloc_tensor %d : tensor<4x4xf64>
282    bufferization.dealloc_tensor %b1 : tensor<8x4xf64, #CSR>
283    bufferization.dealloc_tensor %t1 : tensor<8x8xf64, #CSR>
284    bufferization.dealloc_tensor %b1_coo : tensor<8x4xf64, #COO>
285    bufferization.dealloc_tensor %t1_coo : tensor<8x8xf64, #COO>
286    bufferization.dealloc_tensor %o_coo : tensor<4x4xf64, #COO>
287    bufferization.dealloc_tensor %b  : tensor<8x4xf64, #DCSR>
288    bufferization.dealloc_tensor %tmp: tensor<8x8xf64, #DCSR>
289    bufferization.dealloc_tensor %4  : tensor<4x4xf64, #CSR>
290    bufferization.dealloc_tensor %3  : tensor<4x4xf64, #CSR>
291    bufferization.dealloc_tensor %2  : tensor<4x4xf64, #DCSR>
292    bufferization.dealloc_tensor %dyn_4 : tensor<4x4xf64, #CSR>
293
294    return
295  }
296}
297